Introduction

This is a brief workflow highlighting the exploratory analysis of survey data mined to assist in the writing of the manuscript, “Gender Disparities Persist in Endoscopy Suite” (Rabinowitz, et al.). Where appropriate, samples of the exact R syntax used will be displayed, along with the corresponding output (tabular data, graphical plots, maps, etc.).




DATA CLEAN-UP


require(broom)
require(dplyr)

SURVEY  <-
    GENDER_DIFF_DATA_LABELS %>% 
    filter( COMPLETE != "Incomplete" &
            BIRTHSEX != "OTHER" &
            !is.na(BIRTHSEX) ) %>% 
   select( RECORD_ID, BIRTHSEX, RACE_SOUTHASIAN:RACE_OTHER, AGE, TRAINING_LEVEL, HEIGHT, GLOVE, GLOVE_SIZE_AVAILABLE, PERFORMANCE_HOURS, TEACHER_GENDER_PREFERENCE,
           FEMALE_TRAINERS, MALE_TRAINERS, EVER_INJURED, EXPERIENCED_TRANSIENT_PAIN_NO, EXPERIENCED_TRANSIENT_PAIN_HAND, EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER,
           EXPERIENCED_TRANSIENT_PAIN_BACK, EXPERIENCED_TRANSIENT_PAIN_LEG, EXPERIENCED_TRANSIENT_PAIN_FOOT, GROWING_PAINS,
           FELLOWSHIP_PREGNANCY, starts_with( c("PREGNANCY", "POSTPARTUM")),
           FELLOWSHIP_FORMAL_ERGO_TRAINING, INFORMAL_TRAINING, TRAINING_TECHNIQUES_POSTURAL, TRAINING_TECHNIQUES_BEDHEIGHT, TRAINING_TECHNIQUES_BEDANGLE,
           TRAINING_TECHNIQUES_MONITORHEIGHT, TRAINING_TECHNIQUES_MUSCULOSKELETAL, TRAINING_TECHNIQUES_EXERCISE_STRETCHING, TRAINING_TECHNIQUES_DIAL_EXTENDERS,
           TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE, ERGO_TRAINING_BUDGET, ERGO_FEEDBACK, ERGO_FEEDBACK_BY_WHOM, ERGO_OPTIMIZATION, GLOVE_SIZE_AVAILABLE,
           DIAL_EXTENDERS_AVAILABLE, DIAL_EXTENDERS_ENCOURAGED, DIAL_EXTENDERS_FEMALEATT, DIAL_EXTENDERS_MALEATT, PEDI_COLONOSCOPES_AVAILABLE,
           LEAD_APRONS_AVAILABLE,  LEAD_APRONS_LW_ONEPIECE, LEAD_APRONS_LW_TWOPIECE, LEAD_APRONS_STANDARD_ONEPIECE, LEAD_APRONS_STANDARD_TWOPIECE,
           LEAD_APRONS_DOUBLE, LEAD_APRONS_THYROID, LEAD_APRONS_MATERNALDOS, LEAD_APRONS_FETALDOS,
           ERGO_FORMAL_TIMEOUT_PRIOR, ERGO_INFORMAL_TIMEOUT_PRIOR, MONITORS_ADJUSTABLE, TEACHER_SENSITIVITY_STATURE_HANDSIZE,
           TEACHER_SENSITIVITY_BY_GENDER, TACTILE_INSTRUCTION_FROM_MALES, TACTILE_INSTRUCTION_FROM_FEMALES,
           COMFORTABLE_ASKING_NURSES, ASK_NURSES_ONCE, ASK_NURSES_TWICE, ASK_NURSES_MORE,
           COMFORTABLE_ASKING_TECHS, MALE_ATTENDINGS_ASKING, FEMALE_ATTENDINGS_ASKING,
           RECOGNIZED_RESPECTED_ES_STAFF, RECOGNIZED_RESPECTED_ANESTHETISTS, RECOGNIZED_RESPECTED_GASTRO_ATTENDING, FIRST_NAME_NO_PERMISSION, 
           ERGO_TRAINING_MANDATORY, ERGO_OPTIMIZAITON_BUDGET_REQUIRED, EXPERIENCE_IMPROVED_DIAL_EXTENDERS, EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES, EXPERIENCE_IMPROVED_APRONS,
           ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED,
           ERGONOMIC_IMPORTANCE, MITIGATING_MUSCLE_STRAIN, BED_POSITION, ENDO_TRAINER_POSITION, WHEN_DISABILITY_INSURANCE) %>% 
  
    mutate(AGE2 = ifelse( AGE %in% c('< 30', '30-34', '35-40'), AGE, '> 40' )) %>% 
  
  
    mutate( RACE = ifelse( RACE_HISPANIC == "Y", "HISPANIC",
                   ifelse( RACE_WHITE == "Y", "WHITE",
                   ifelse( RACE_BLACK == "Y", "BLACK",
                   ifelse (RACE_SOUTHASIAN == "Y", "ASIAN SOUTH",
                   ifelse (RACE_EASTASIAN == "Y", "ASIAN EAST",
                   ifelse (RACE_NATIVEAMER == "Y", "OTHER",
                   ifelse (RACE_PACIFICISLAND == "Y", "OTHER",
                   ifelse (RACE_OTHER == "Y", "OTHER", "OTHER" )))))))),
            RACE = factor(RACE, levels= c('ASIAN EAST', 'ASIAN SOUTH', 'BLACK', 'HISPANIC', 'WHITE', 'OTHER'))) %>%
  
    mutate( BIRTHSEX = factor( BIRTHSEX,  levels= c("F","M") ))  %>% 
    mutate (AGE2 = factor(AGE2, levels = c('< 30', '30-34', '35-40', '> 40'))) %>%  

    mutate (RACE2 = case_when( RACE != "WHITE" ~ 'NON-WHITE',
                               TRUE ~ 'WHITE'),
            RACE2 = factor(RACE2, levels = c("WHITE", "NON-WHITE"))) %>% 
            

                           
                           
    mutate( TRAINING_LEVEL = factor (TRAINING_LEVEL, levels= c('First year fellow','Second year fellow', 'Third year fellow', 'Advanced fellow'))) %>% 
    mutate( TRAINING_LEVEL = recode_factor( TRAINING_LEVEL, 'First year fellow'= 'First Year', 
                                                            'Second year fellow'= 'Second Year', 
                                                            'Third year fellow' = 'Third Year', 
                                                            'Advanced fellow' = "Advanced", .ordered = T) ) %>% 
   mutate( HEIGHT2 = factor(HEIGHT, levels= c("< 5'", "5-5'3", "5'4-5'6", "5'7-5'9", "5'10-6'", "6'1-6'4", "> 6'4"))) %>% 
   mutate( PERFORMANCE_HOURS = factor(PERFORMANCE_HOURS),
           PERFORMANCE_HOURS = recode_factor(PERFORMANCE_HOURS, "< 10" = "< 10",
                                                               "10-20" = "10-20",
                                                               "21-30" = "21-30",
                                                               "31-40" = "31-40",
                                                               .default = "> 40")) %>% 
  
  mutate(TEACHER_GENDER_PREFERENCE = factor(TEACHER_GENDER_PREFERENCE),
         TEACHER_GENDER_PREFERENCE = recode_factor(TEACHER_GENDER_PREFERENCE, "Yes" = "Yes",
                                                                      .default = "No")) %>% 
  
  mutate( FEMALE_TRAINERS = factor(FEMALE_TRAINERS),
          FEMALE_TRAINERS = recode_factor(FEMALE_TRAINERS, 'None' = 'None',
                                                           '1-2' = '1-2',
                                                           '3-5' = '3-5',
                                                           '6-10' = '6-10',
                                                           .default = '> 10' )) %>% 
  mutate( MALE_TRAINERS = factor(MALE_TRAINERS),
          MALE_TRAINERS = recode_factor(MALE_TRAINERS, 'None' = 'None',
                                                           '1-2' = '1-2',
                                                           '3-5' = '3-5',
                                                           '6-10' = '6-10',
                                                          .default = "> 10") ) %>% 
  
  mutate( EVER_INJURED = factor(EVER_INJURED)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_NO = factor(EXPERIENCED_TRANSIENT_PAIN_NO)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_HAND = factor(EXPERIENCED_TRANSIENT_PAIN_HAND)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER = factor(EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_BACK = factor(EXPERIENCED_TRANSIENT_PAIN_BACK)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_LEG = factor(EXPERIENCED_TRANSIENT_PAIN_LEG)) %>% 
  mutate( EXPERIENCED_TRANSIENT_PAIN_FOOT = factor(EXPERIENCED_TRANSIENT_PAIN_FOOT))  %>% 
  
  mutate( FELLOWSHIP_PREGNANCY = factor(FELLOWSHIP_PREGNANCY),
          PREGNANCY_ERGO_DIFFICULTY = factor(PREGNANCY_ERGO_DIFFICULTY),
          PREGNANCY_ERGO_INJURY = factor( PREGNANCY_ERGO_INJURY),
          POSTPARTUM_ERGO_INJURY = factor( POSTPARTUM_ERGO_INJURY)) %>% 
  
  mutate( GROWING_PAINS = factor(GROWING_PAINS)) %>% 
  mutate( FELLOWSHIP_FORMAL_ERGO_TRAINING = factor(FELLOWSHIP_FORMAL_ERGO_TRAINING)) %>% 
  mutate( INFORMAL_TRAINING = factor(INFORMAL_TRAINING)) %>% 
  mutate( TRAINING_TECHNIQUES_POSTURAL = factor(TRAINING_TECHNIQUES_POSTURAL)) %>% 
  mutate( TRAINING_TECHNIQUES_BEDHEIGHT = factor(TRAINING_TECHNIQUES_BEDHEIGHT)) %>% 
  mutate( TRAINING_TECHNIQUES_BEDANGLE = factor(TRAINING_TECHNIQUES_BEDANGLE)) %>% 
  mutate( TRAINING_TECHNIQUES_MONITORHEIGHT = factor(TRAINING_TECHNIQUES_MONITORHEIGHT)) %>% 
  mutate( TRAINING_TECHNIQUES_MUSCULOSKELETAL = factor(TRAINING_TECHNIQUES_MUSCULOSKELETAL)) %>% 
  mutate( TRAINING_TECHNIQUES_EXERCISE_STRETCHING = factor(TRAINING_TECHNIQUES_EXERCISE_STRETCHING)) %>% 
  mutate( TRAINING_TECHNIQUES_DIAL_EXTENDERS = factor(TRAINING_TECHNIQUES_DIAL_EXTENDERS)) %>% 
  mutate( TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE = factor(TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE)) %>% 
  
  mutate( ERGO_TRAINING_BUDGET = factor(ERGO_TRAINING_BUDGET),
          ERGO_TRAINING_BUDGET = recode_factor(ERGO_TRAINING_BUDGET, 'Y' = 'Y',
                                                                     'N' = 'N',
                                                                     "Don't know" = 'DK', .ordered= T)) %>% 
  

  mutate( ERGO_FEEDBACK = factor(ERGO_FEEDBACK),
          ERGO_FEEDBACK = recode_factor(ERGO_FEEDBACK, 'Never' = 'Never',
                                                       'Rarely' = 'Rarely',
                                                       'Sometimes' = 'Sometimes',
                                                        'Often' = 'Often', .ordered = T )) %>% 
            
  mutate( ERGO_FEEDBACK_BY_WHOM = factor(ERGO_FEEDBACK_BY_WHOM),
          ERGO_FEEDBACK_BY_WHOM = recode_factor(ERGO_FEEDBACK_BY_WHOM, 'I do not or rarely receive ergonomic feedback' = "Do not/rarely received feedback",
                                                                     'Mostly male endoscopy teachers' = 'Mostly male teachers',
                                                                     'Mostly female endoscopy teachers' = 'Mostly female teachers',
                                                                     'Both male and female endoscopy teachers equally' = 'Both equally' , .ordered = T)) %>% 
  
  
  mutate( ERGO_OPTIMIZATION = factor(ERGO_OPTIMIZATION),
          ERGO_OPTIMIZATION = recode_factor(ERGO_OPTIMIZATION, 'Y' = 'Y',
                                                                     'N' = 'N',
                                                                     "Don't know" = 'DK', .ordered= T)) %>% 
  
  mutate( GLOVE_SIZE_AVAILABLE = factor(GLOVE_SIZE_AVAILABLE)) %>% 
  
  mutate( DIAL_EXTENDERS_AVAILABLE = factor(DIAL_EXTENDERS_AVAILABLE),
          DIAL_EXTENDERS_AVAILABLE = recode_factor(DIAL_EXTENDERS_AVAILABLE, 'Y' = 'Y',
                                                                             'N' = 'N',
                                                                             "Don't know" = 'DK', .ordered= T)) %>% 
  
  mutate( DIAL_EXTENDERS_ENCOURAGED = factor(DIAL_EXTENDERS_ENCOURAGED),
          DIAL_EXTENDERS_ENCOURAGED = recode_factor(DIAL_EXTENDERS_ENCOURAGED, 'Y' = 'Y',
                                                                                 'N' = 'N',
                                                                                 "Don't use" = 'DU', .ordered= T)) %>% 
  
  

  mutate( DIAL_EXTENDERS_FEMALEATT = factor(DIAL_EXTENDERS_FEMALEATT),
          DIAL_EXTENDERS_FEMALEATT = recode_factor(DIAL_EXTENDERS_FEMALEATT, 'Not Likely' = 'Not Likely',
                                                                             'Somewhat Likely' = 'Somewhat Likely',
                                                                             'Sometimes' = 'Sometimes',
                                                                             'Verly Likely' = 'Very Likely',
                                                                             'NA' = 'NA', .ordered = T )) %>% 

  mutate( DIAL_EXTENDERS_MALEATT = factor(DIAL_EXTENDERS_MALEATT),
          DIAL_EXTENDERS_MALEATT = recode_factor(DIAL_EXTENDERS_MALEATT, 'Not Likely' = 'Not Likely',
                                                                             'Somewhat Likely' = 'Somewhat Likely',
                                                                             'Sometimes' = 'Sometimes',
                                                                             'Verly Likely' = 'Very Likely',
                                                                             'NA' = 'NA', .ordered = T ))  %>% 
    mutate( PEDI_COLONOSCOPES_AVAILABLE = factor(PEDI_COLONOSCOPES_AVAILABLE),
          PEDI_COLONOSCOPES_AVAILABLE = recode_factor(PEDI_COLONOSCOPES_AVAILABLE, 'Y' = 'Y',
                                                                                   'N' = 'N',
                                                                                   "Don't know" = 'DK', .ordered= T)) %>% 
  
   mutate( LEAD_APRONS_AVAILABLE = factor(LEAD_APRONS_AVAILABLE),
          LEAD_APRONS_AVAILABLE = recode_factor(LEAD_APRONS_AVAILABLE, 'N' = 'N',
                                                                       'Y' = 'Y',
                                                                       "Don't know" = "DK",.ordered= T)) %>% 
  
   mutate( TEACHER_SENSITIVITY_BY_GENDER = factor(TEACHER_SENSITIVITY_BY_GENDER),
          TEACHER_SENSITIVITY_BY_GENDER = recode_factor(TEACHER_SENSITIVITY_BY_GENDER, 'Male' = 'Male',
                                                           'Female' = 'Female',
                                                           'Both equally' = 'Both Equally',
                                                           'I have not had female endoscopy teachers' = 'Never had female teacher',
                                                           'Not sure' = 'Not Sure', .ordered= T ))  %>% 
  
   mutate( TACTILE_INSTRUCTION_FROM_MALES = factor(TACTILE_INSTRUCTION_FROM_MALES),
          TACTILE_INSTRUCTION_FROM_MALES = recode_factor(TACTILE_INSTRUCTION_FROM_MALES, 'No' = 'No',
                                                                                         'Yes, rarely' = 'Rarely',
                                                                                         'Yes, often' = 'Often', .ordered= T)) %>% 
  
   mutate( TACTILE_INSTRUCTION_FROM_FEMALES = factor(TACTILE_INSTRUCTION_FROM_FEMALES),
          TACTILE_INSTRUCTION_FROM_FEMALES = recode_factor(TACTILE_INSTRUCTION_FROM_FEMALES, 'No' = 'No',
                                                                                         'Yes, rarely' = 'Rarely',
                                                                                         'Yes, often' = 'Often', .ordered= T)) %>% 
  
   mutate( NURSES_ASKING = ifelse( ASK_NURSES_MORE == "Y", "More than Twice",
                           ifelse( ASK_NURSES_TWICE == "Y", "Twice",
                           ifelse( ASK_NURSES_ONCE == "Y", "Once", NA))),
           
           NURSES_ASKING = factor(NURSES_ASKING),
           NURSES_ASKING = recode_factor(NURSES_ASKING, "Once" = "Once",
                                                        "Twice" = "Twice",
                                                        "More than Twicce" = "More than Twice", .ordered=T),
  
           MALE_ATTENDINGS_ASKING = factor(MALE_ATTENDINGS_ASKING),
           MALE_ATTENDINGS_ASKING = recode_factor(MALE_ATTENDINGS_ASKING, "Once" = "Once",
                                                                          "Twice" = "Twice",
                                                                           "More than Twice" = "More than Twice", .ordered=T),

           FEMALE_ATTENDINGS_ASKING = factor(FEMALE_ATTENDINGS_ASKING),
           FEMALE_ATTENDINGS_ASKING = recode_factor(FEMALE_ATTENDINGS_ASKING, "Once" = "Once",
                                                                             "Twice" = "Twice",
                                                                             "More than twice" = "More than Twice",
                                                                             "Not applicable, I do not work with any female attendings" = "Don't work with FemAtt", .ordered=T)) %>% 
  
   mutate( ERGO_TRAINING_MANDATORY = factor(ERGO_TRAINING_MANDATORY),
           ERGO_TRAINING_MANDATORY = recode_factor(ERGO_TRAINING_MANDATORY, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T) ,
    
           ERGO_OPTIMIZAITON_BUDGET_REQUIRED = factor(ERGO_OPTIMIZAITON_BUDGET_REQUIRED),
           ERGO_OPTIMIZAITON_BUDGET_REQUIRED = recode_factor(ERGO_OPTIMIZAITON_BUDGET_REQUIRED, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T),
                 
                                                
          EXPERIENCE_IMPROVED_DIAL_EXTENDERS = factor(EXPERIENCE_IMPROVED_DIAL_EXTENDERS),
           EXPERIENCE_IMPROVED_DIAL_EXTENDERS = recode_factor(EXPERIENCE_IMPROVED_DIAL_EXTENDERS, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T),
     
           EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES = factor(EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES),
           EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES = recode_factor(EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T) ,
     
           EXPERIENCE_IMPROVED_APRONS = factor(EXPERIENCE_IMPROVED_APRONS),
           EXPERIENCE_IMPROVED_APRONS = recode_factor(EXPERIENCE_IMPROVED_APRONS, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T),
     
           ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED = factor(ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED),
           ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED = recode_factor(ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED, 'Y' = 'Y',
                                                                              'N' = 'N',
                                                                            "Don't know" = 'DK', .ordered= T))  %>% 
  
  
   mutate( ERGONOMIC_IMPORTANCE = factor(ERGONOMIC_IMPORTANCE),
           ERGONOMIC_IMPORTANCE = recode_factor(ERGONOMIC_IMPORTANCE, 'Both A and C' = 'Correct',
                                                                       .default = 'Incorrect', .ordered= T) ,
    
           MITIGATING_MUSCLE_STRAIN = factor(MITIGATING_MUSCLE_STRAIN),
           MITIGATING_MUSCLE_STRAIN = recode_factor(MITIGATING_MUSCLE_STRAIN, 'All of the above' = 'Correct',
                                                                       .default = 'Incorrect', .ordered= T) ,
    
           BED_POSITION = factor(BED_POSITION),
           BED_POSITION = recode_factor(BED_POSITION, '10 cm below elbow height' = 'Correct',
                                                                       .default = 'Incorrect', .ordered= T) ,
    
           ENDO_TRAINER_POSITION = factor(ENDO_TRAINER_POSITION),
           ENDO_TRAINER_POSITION = recode_factor(ENDO_TRAINER_POSITION, 'At the foot of the bed, on the same side of the trainee.' = 'Correct',
                                                                       .default = 'Incorrect', .ordered= T) ,
    
           WHEN_DISABILITY_INSURANCE = factor(WHEN_DISABILITY_INSURANCE),
           WHEN_DISABILITY_INSURANCE = recode_factor(WHEN_DISABILITY_INSURANCE, 'During training' = 'Correct',
                                                                       .default = 'Incorrect', .ordered= T) ) 


Here’s a glimpse of the structure of the resulting dataset SURVEY:

glimpse(SURVEY)
## Rows: 236
## Columns: 93
## $ RECORD_ID                                 <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 1…
## $ BIRTHSEX                                  <fct> F, F, F, M, F, F, F, F, F, M…
## $ RACE_SOUTHASIAN                           <chr> "N", "N", "N", "Y", "N", "N"…
## $ RACE_EASTASIAN                            <chr> "N", "N", "N", "N", "Y", "Y"…
## $ RACE_WHITE                                <chr> "N", "Y", "Y", "N", "N", "N"…
## $ RACE_BLACK                                <chr> "N", "N", "N", "N", "N", "N"…
## $ RACE_HISPANIC                             <chr> "Y", "N", "N", "N", "N", "N"…
## $ RACE_NATIVEAMER                           <chr> "N", "N", "N", "N", "N", "N"…
## $ RACE_PACIFICISLAND                        <chr> "N", "N", "N", "N", "N", "N"…
## $ RACE_OTHER                                <chr> "N", "N", "N", "N", "N", "N"…
## $ AGE                                       <chr> "30-34", "30-34", "30-34", "…
## $ TRAINING_LEVEL                            <ord> Third Year, Third Year, Firs…
## $ HEIGHT                                    <chr> "5'4-5'6", "5'4-5'6", "5'4-5…
## $ GLOVE                                     <dbl> 6.5, 6.5, 6.0, 7.0, 6.5, 5.5…
## $ GLOVE_SIZE_AVAILABLE                      <fct> Y, Y, Y, Y, N, N, Y, Y, N, Y…
## $ PERFORMANCE_HOURS                         <fct> 10-20, < 10, 10-20, 31-40, 1…
## $ TEACHER_GENDER_PREFERENCE                 <fct> No, No, Yes, No, No, No, No,…
## $ FEMALE_TRAINERS                           <fct> None, 6-10, 6-10, 6-10, 6-10…
## $ MALE_TRAINERS                             <fct> 6-10, > 10, > 10, > 10, > 10…
## $ EVER_INJURED                              <fct> N, N, N, N, Y, N, N, N, N, N…
## $ EXPERIENCED_TRANSIENT_PAIN_NO             <fct> Y, N, N, N, N, N, N, N, N, N…
## $ EXPERIENCED_TRANSIENT_PAIN_HAND           <fct> N, Y, Y, Y, Y, Y, Y, N, N, Y…
## $ EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER  <fct> N, Y, Y, Y, Y, N, Y, Y, Y, Y…
## $ EXPERIENCED_TRANSIENT_PAIN_BACK           <fct> N, Y, Y, Y, Y, N, Y, N, N, Y…
## $ EXPERIENCED_TRANSIENT_PAIN_LEG            <fct> N, N, Y, Y, N, N, N, N, N, N…
## $ EXPERIENCED_TRANSIENT_PAIN_FOOT           <fct> N, N, Y, Y, N, Y, N, N, N, N…
## $ GROWING_PAINS                             <fct> NA, Y, Y, Y, Y, N, N, Y, N, …
## $ FELLOWSHIP_PREGNANCY                      <fct> N, Y, N, NA, N, N, N, N, N, …
## $ PREGNANCY_ERGO_DIFFICULTY                 <fct> NA, Y, NA, NA, NA, NA, NA, N…
## $ PREGNANCY_ERGO_INJURY                     <fct> NA, N, NA, NA, NA, NA, NA, N…
## $ POSTPARTUM_ERGO_INJURY                    <fct> NA, N, NA, NA, NA, NA, NA, N…
## $ FELLOWSHIP_FORMAL_ERGO_TRAINING           <fct> N, N, N, N, N, N, N, Y, N, Y…
## $ INFORMAL_TRAINING                         <fct> Y, Y, Y, Y, Y, Y, N, Y, Y, Y…
## $ TRAINING_TECHNIQUES_POSTURAL              <fct> Y, N, Y, Y, N, Y, Y, Y, N, Y…
## $ TRAINING_TECHNIQUES_BEDHEIGHT             <fct> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y…
## $ TRAINING_TECHNIQUES_BEDANGLE              <fct> Y, N, Y, Y, N, Y, Y, N, Y, Y…
## $ TRAINING_TECHNIQUES_MONITORHEIGHT         <fct> Y, N, N, Y, N, Y, Y, N, Y, Y…
## $ TRAINING_TECHNIQUES_MUSCULOSKELETAL       <fct> Y, N, N, Y, N, N, N, Y, Y, N…
## $ TRAINING_TECHNIQUES_EXERCISE_STRETCHING   <fct> N, N, N, N, N, N, N, N, N, N…
## $ TRAINING_TECHNIQUES_DIAL_EXTENDERS        <fct> N, N, Y, N, Y, N, Y, N, N, N…
## $ TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE <fct> Y, N, Y, Y, Y, N, Y, Y, Y, N…
## $ ERGO_TRAINING_BUDGET                      <ord> DK, N, DK, DK, N, DK, DK, N,…
## $ ERGO_FEEDBACK                             <ord> Sometimes, Rarely, Sometimes…
## $ ERGO_FEEDBACK_BY_WHOM                     <ord> Mostly male teachers, Mostly…
## $ ERGO_OPTIMIZATION                         <ord> DK, N, N, Y, N, N, Y, DK, N,…
## $ DIAL_EXTENDERS_AVAILABLE                  <ord> DK, N, Y, Y, Y, N, Y, DK, N,…
## $ DIAL_EXTENDERS_ENCOURAGED                 <ord> DU, N, Y, Y, Y, DU, Y, NA, N…
## $ DIAL_EXTENDERS_FEMALEATT                  <ord> NA, NA, Not likely, NA, Very…
## $ DIAL_EXTENDERS_MALEATT                    <ord> NA, NA, Very likely, NA, Ver…
## $ PEDI_COLONOSCOPES_AVAILABLE               <ord> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y…
## $ LEAD_APRONS_AVAILABLE                     <ord> Y, Y, DK, Y, DK, N, DK, Y, N…
## $ LEAD_APRONS_LW_ONEPIECE                   <chr> "N", "N", "N", "Y", "N", "Y"…
## $ LEAD_APRONS_LW_TWOPIECE                   <chr> "Y", "N", "N", "Y", "N", "Y"…
## $ LEAD_APRONS_STANDARD_ONEPIECE             <chr> "N", "Y", "N", "Y", "N", "Y"…
## $ LEAD_APRONS_STANDARD_TWOPIECE             <chr> "N", "Y", "N", "Y", "N", "Y"…
## $ LEAD_APRONS_DOUBLE                        <chr> "N", "N", "N", "N", "N", "N"…
## $ LEAD_APRONS_THYROID                       <chr> "N", "N", "N", "Y", "N", "N"…
## $ LEAD_APRONS_MATERNALDOS                   <chr> "N", "N", "N", "N", "N", "N"…
## $ LEAD_APRONS_FETALDOS                      <chr> "N", "N", "N", "N", "N", "N"…
## $ ERGO_FORMAL_TIMEOUT_PRIOR                 <chr> "N", "N", "N", "N", "N", "N"…
## $ ERGO_INFORMAL_TIMEOUT_PRIOR               <chr> "Y", "N", "Y", "Y", "Y", "Y"…
## $ MONITORS_ADJUSTABLE                       <chr> "Y", "N", "N", "Y", "N", "Y"…
## $ TEACHER_SENSITIVITY_STATURE_HANDSIZE      <chr> "Y", "N", "N", "Y", "N", "N"…
## $ TEACHER_SENSITIVITY_BY_GENDER             <ord> Never had female teacher, No…
## $ TACTILE_INSTRUCTION_FROM_MALES            <ord> Often, No, No, No, No, No, O…
## $ TACTILE_INSTRUCTION_FROM_FEMALES          <ord> No, No, No, Rarely, Rarely, …
## $ COMFORTABLE_ASKING_NURSES                 <chr> "Y", "Y", "Y", "Y", "Y", "Y"…
## $ ASK_NURSES_ONCE                           <chr> "Y", "Y", "N", "N", "N", "Y"…
## $ ASK_NURSES_TWICE                          <chr> "N", "N", "Y", "N", "N", "N"…
## $ ASK_NURSES_MORE                           <chr> "N", "N", "N", "Y", "Y", "N"…
## $ COMFORTABLE_ASKING_TECHS                  <chr> "Y", "Y", "Y", "Y", "Y", "Y"…
## $ MALE_ATTENDINGS_ASKING                    <ord> Once, NA, More than twice, M…
## $ FEMALE_ATTENDINGS_ASKING                  <ord> Don't work with FemAtt, NA, …
## $ RECOGNIZED_RESPECTED_ES_STAFF             <chr> "Y", "Y", "Y", "Y", "Y", "Y"…
## $ RECOGNIZED_RESPECTED_ANESTHETISTS         <chr> "Y", "Y", "Y", "Y", "Y", "N"…
## $ RECOGNIZED_RESPECTED_GASTRO_ATTENDING     <chr> "Y", "Y", "Y", "Y", "Y", "Y"…
## $ FIRST_NAME_NO_PERMISSION                  <chr> "N", "Y", "Y", "N", "N", "Y"…
## $ ERGO_TRAINING_MANDATORY                   <ord> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y…
## $ ERGO_OPTIMIZAITON_BUDGET_REQUIRED         <ord> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y…
## $ EXPERIENCE_IMPROVED_DIAL_EXTENDERS        <ord> DK, DK, Y, N, Y, Y, Y, DK, Y…
## $ EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES     <ord> DK, N, Y, N, Y, Y, Y, N, Y, …
## $ EXPERIENCE_IMPROVED_APRONS                <ord> N, Y, Y, N, Y, Y, Y, DK, Y, …
## $ ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED    <ord> Y, Y, Y, Y, Y, Y, Y, Y, Y, Y…
## $ ERGONOMIC_IMPORTANCE                      <ord> Incorrect, Correct, Correct,…
## $ MITIGATING_MUSCLE_STRAIN                  <ord> Incorrect, Correct, Correct,…
## $ BED_POSITION                              <ord> Incorrect, Incorrect, Correc…
## $ ENDO_TRAINER_POSITION                     <ord> Incorrect, Correct, Incorrec…
## $ WHEN_DISABILITY_INSURANCE                 <ord> Incorrect, Correct, Correct,…
## $ AGE2                                      <fct> 30-34, 30-34, 30-34, 30-34, …
## $ RACE                                      <fct> HISPANIC, WHITE, WHITE, ASIA…
## $ RACE2                                     <fct> NON-WHITE, WHITE, WHITE, NON…
## $ HEIGHT2                                   <fct> 5'4-5'6, 5'4-5'6, 5'4-5'6, 6…
## $ NURSES_ASKING                             <ord> Once, Once, Twice, More than…


DEMOGRAPHIC DATA


Question 1: Distribution of RACE x AGE


#Female/Male Totals

sqldf("select BIRTHSEX,
              count(RECORD_ID) as N
       from SURVEY 
       where BIRTHSEX in ('F','M') 
       group by 1")
##   BIRTHSEX   N
## 1        F 113
## 2        M 123
#Chi-Square Test of Proportions

fcount <- length(SURVEY$BIRTHSEX[SURVEY$BIRTHSEX =="F"])
mcount <- length(SURVEY$BIRTHSEX[SURVEY$BIRTHSEX =="M"])
fcount
## [1] 113
mcount
## [1] 123
females_males = c(fcount, mcount )
chisq.test(females_males, p= c(1/2, 1/2))
## 
##  Chi-squared test for given probabilities
## 
## data:  females_males
## X-squared = 0.42373, df = 1, p-value = 0.5151
#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$AGE2, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Age Distribution by Birth Sex",
          axis.titles = c('Respondents Age Bands'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET2 <-
SURVEY %>% 
  filter( !is.na(AGE2)) %>% 
  mutate(BIRTHSEX = recode_factor( BIRTHSEX, "M" = "MALES", "F" = "FEMALES", .ordered=T))


ggplot(  SUBSET2, aes(x= AGE2)) + facet_grid( SUBSET2$BIRTHSEX ) +  
  geom_bar(aes(fill= BIRTHSEX) ) +
  stat_count(geom="text", aes(label=..count..), vjust= -.1) + 
  scale_fill_manual( values = c( "MALES"="darkgrey", "FEMALES"="#006cc5"), guide = "none" )+
  theme_538()  +
  xlab("Age Bands")+ ylab("Counts") 

#Alternative View, if Birth Sex by Age is desired 


CrossTable(SURVEY$AGE, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##              | SURVEY$BIRTHSEX 
##   SURVEY$AGE |         F |         M | Row Total | 
## -------------|-----------|-----------|-----------|
##         < 30 |        10 |         8 |        18 | 
##              |     0.556 |     0.444 |     0.076 | 
##              |     0.088 |     0.065 |           | 
## -------------|-----------|-----------|-----------|
##        30-34 |        93 |        90 |       183 | 
##              |     0.508 |     0.492 |     0.775 | 
##              |     0.823 |     0.732 |           | 
## -------------|-----------|-----------|-----------|
##        35-40 |        10 |        24 |        34 | 
##              |     0.294 |     0.706 |     0.144 | 
##              |     0.088 |     0.195 |           | 
## -------------|-----------|-----------|-----------|
##        41-50 |         0 |         1 |         1 | 
##              |     0.000 |     1.000 |     0.004 | 
##              |     0.000 |     0.008 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       113 |       123 |       236 | 
##              |     0.479 |     0.521 |           | 
## -------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  6.624273     d.f. =  3     p =  0.08488823 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.05670541 
## 
## 



Question 2: Distribution of RACE x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$RACE, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Race Distribution by Birth Sex",
          axis.titles = c('Race Categories '), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET2 <-
SURVEY %>% 
  filter( !is.na(RACE)) %>% 
  mutate(BIRTHSEX = recode_factor( BIRTHSEX, "M" = "MALES", "F" = "FEMALES", .ordered=T))


ggplot(  SUBSET2, aes(x= RACE)) + facet_grid( SUBSET2$BIRTHSEX ) +  
  geom_bar(aes(fill= BIRTHSEX) ) +
  stat_count(geom="text", aes(label=..count..), vjust= -.05) + 
  scale_fill_manual( values = c( "MALES"="darkgrey", "FEMALES"="#006cc5"), guide = "none" )+
  theme_538()  +
  xlab("Race/Ethnicity Categories")+ ylab("Counts") 

#Alternative View, if Birth Sex by Race is desired 


CrossTable(SURVEY$RACE, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r = T, prop.t=F, chisq=T, fisher=F)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##              | SURVEY$BIRTHSEX 
##  SURVEY$RACE |         F |         M | Row Total | 
## -------------|-----------|-----------|-----------|
##   ASIAN EAST |        24 |        13 |        37 | 
##              |     0.649 |     0.351 |     0.157 | 
##              |     0.212 |     0.106 |           | 
## -------------|-----------|-----------|-----------|
##  ASIAN SOUTH |        33 |        23 |        56 | 
##              |     0.589 |     0.411 |     0.237 | 
##              |     0.292 |     0.187 |           | 
## -------------|-----------|-----------|-----------|
##        BLACK |         5 |         6 |        11 | 
##              |     0.455 |     0.545 |     0.047 | 
##              |     0.044 |     0.049 |           | 
## -------------|-----------|-----------|-----------|
##     HISPANIC |         9 |         6 |        15 | 
##              |     0.600 |     0.400 |     0.064 | 
##              |     0.080 |     0.049 |           | 
## -------------|-----------|-----------|-----------|
##        WHITE |        36 |        68 |       104 | 
##              |     0.346 |     0.654 |     0.441 | 
##              |     0.319 |     0.553 |           | 
## -------------|-----------|-----------|-----------|
##        OTHER |         6 |         7 |        13 | 
##              |     0.462 |     0.538 |     0.055 | 
##              |     0.053 |     0.057 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       113 |       123 |       236 | 
##              |     0.479 |     0.521 |           | 
## -------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  15.27367     d.f. =  5     p =  0.009254827 
## 
## 
## 



Question 3: Distribution of TRAINING LEVEL x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$TRAINING_LEVEL, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training Levels by Birth Sex",
          axis.titles = c('Training Levels'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Training Level is desired 


CrossTable(SURVEY$TRAINING_LEVEL, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=F)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                       | SURVEY$BIRTHSEX 
## SURVEY$TRAINING_LEVEL |         F |         M | Row Total | 
## ----------------------|-----------|-----------|-----------|
##            First Year |        43 |        36 |        79 | 
##                       |     0.544 |     0.456 |     0.336 | 
##                       |     0.381 |     0.295 |           | 
## ----------------------|-----------|-----------|-----------|
##           Second Year |        41 |        36 |        77 | 
##                       |     0.532 |     0.468 |     0.328 | 
##                       |     0.363 |     0.295 |           | 
## ----------------------|-----------|-----------|-----------|
##            Third Year |        23 |        42 |        65 | 
##                       |     0.354 |     0.646 |     0.277 | 
##                       |     0.204 |     0.344 |           | 
## ----------------------|-----------|-----------|-----------|
##              Advanced |         6 |         8 |        14 | 
##                       |     0.429 |     0.571 |     0.060 | 
##                       |     0.053 |     0.066 |           | 
## ----------------------|-----------|-----------|-----------|
##          Column Total |       113 |       122 |       235 | 
##                       |     0.481 |     0.519 |           | 
## ----------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  6.449267     d.f. =  3     p =  0.09168489 
## 
## 
## 



Question 4: Distribution of HEIGHT2 x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$HEIGHT2, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Height Bands by Birth Sex",
          axis.titles = c('Height Bands'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET2 <-
SURVEY %>% 
  filter( !is.na(HEIGHT2)) %>% 
  mutate(BIRTHSEX = recode_factor( BIRTHSEX, "M" = "MALES", "F" = "FEMALES", .ordered=T))


ggplot(  SUBSET2, aes(x= HEIGHT2)) + facet_grid( SUBSET2$BIRTHSEX ) +  
  geom_bar(aes(fill= BIRTHSEX) ) +
  stat_count(geom="text", aes(label=..count..), vjust= -.3) + 
  scale_fill_manual( values = c( "MALES"="darkgrey", "FEMALES"="#006cc5"), guide = "none" )+
  theme_538()  +
  xlab("Height Bands")+ ylab("Counts") 

#Alternative View, if Birth Sex by Height is desired 



CrossTable(SURVEY$HEIGHT2, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r = F, prop.t=F, chisq=T, fisher=F)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                | SURVEY$BIRTHSEX 
## SURVEY$HEIGHT2 |         F |         M | Row Total | 
## ---------------|-----------|-----------|-----------|
##           < 5' |         2 |         0 |         2 | 
##                |     0.018 |     0.000 |           | 
## ---------------|-----------|-----------|-----------|
##          5-5'3 |        38 |         2 |        40 | 
##                |     0.339 |     0.016 |           | 
## ---------------|-----------|-----------|-----------|
##        5'4-5'6 |        46 |        10 |        56 | 
##                |     0.411 |     0.081 |           | 
## ---------------|-----------|-----------|-----------|
##        5'7-5'9 |        25 |        41 |        66 | 
##                |     0.223 |     0.333 |           | 
## ---------------|-----------|-----------|-----------|
##        5'10-6' |         1 |        44 |        45 | 
##                |     0.009 |     0.358 |           | 
## ---------------|-----------|-----------|-----------|
##        6'1-6'4 |         0 |        24 |        24 | 
##                |     0.000 |     0.195 |           | 
## ---------------|-----------|-----------|-----------|
##          > 6'4 |         0 |         2 |         2 | 
##                |     0.000 |     0.016 |           | 
## ---------------|-----------|-----------|-----------|
##   Column Total |       112 |       123 |       235 | 
##                |     0.477 |     0.523 |           | 
## ---------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  128.2767     d.f. =  6     p =  2.963565e-25 
## 
## 
## 



Question 5: Distribution of GLOVE SIZE x BIRTHSEX


require(ggstatsplot)

#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$GLOVE, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Glove Size by Birth Sex",
          axis.titles = c('Glove Sizes'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Glove Size is desired 


CrossTable(SURVEY$GLOVE, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  231 
## 
##  
##              | SURVEY$BIRTHSEX 
## SURVEY$GLOVE |         F |         M | Row Total | 
## -------------|-----------|-----------|-----------|
##            5 |         2 |         0 |         2 | 
##              |     1.000 |     0.000 |     0.009 | 
##              |     0.018 |     0.000 |           | 
## -------------|-----------|-----------|-----------|
##          5.5 |         7 |         0 |         7 | 
##              |     1.000 |     0.000 |     0.030 | 
##              |     0.064 |     0.000 |           | 
## -------------|-----------|-----------|-----------|
##            6 |        29 |         0 |        29 | 
##              |     1.000 |     0.000 |     0.126 | 
##              |     0.264 |     0.000 |           | 
## -------------|-----------|-----------|-----------|
##          6.5 |        55 |         6 |        61 | 
##              |     0.902 |     0.098 |     0.264 | 
##              |     0.500 |     0.050 |           | 
## -------------|-----------|-----------|-----------|
##            7 |        15 |        36 |        51 | 
##              |     0.294 |     0.706 |     0.221 | 
##              |     0.136 |     0.298 |           | 
## -------------|-----------|-----------|-----------|
##          7.5 |         2 |        60 |        62 | 
##              |     0.032 |     0.968 |     0.268 | 
##              |     0.018 |     0.496 |           | 
## -------------|-----------|-----------|-----------|
##            8 |         0 |        16 |        16 | 
##              |     0.000 |     1.000 |     0.069 | 
##              |     0.000 |     0.132 |           | 
## -------------|-----------|-----------|-----------|
##          8.5 |         0 |         3 |         3 | 
##              |     0.000 |     1.000 |     0.013 | 
##              |     0.000 |     0.025 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       110 |       121 |       231 | 
##              |     0.476 |     0.524 |           | 
## -------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  159.1027     d.f. =  7     p =  4.954612e-31 
## 
## 
## 
#Median Glove Size - Sex Difference


  SURVEY %>% 
  group_by( BIRTHSEX) %>% 
  summarize( GLOVE_MEDIAN = median(GLOVE, na.rm=T))
## # A tibble: 2 × 2
##   BIRTHSEX GLOVE_MEDIAN
##   <fct>           <dbl>
## 1 F                 6.5
## 2 M                 7.5
ggbetweenstats( data= SURVEY,
                x = BIRTHSEX,
                y = GLOVE,
                type="nonparametric",
                p.adjust.method = "none")

  SURVEY %>% 
      filter( !is.na(GLOVE) ) %>% 
  ggplot(aes( x=GLOVE, y= stat(density), fill= BIRTHSEX))+
  geom_density( alpha=0.5, position = "identity" ) + 
  scale_x_continuous(breaks = scales::breaks_width(0.5) ) +
    scale_y_continuous(breaks = scales::breaks_width(.25))+
    scale_fill_manual(values = c("red","darkgreen"), name ="Birth Sex", labels = c("Female","Male"))+
  xlab("Glove Size")+ ylab("Density")+
  ggtitle("Glove Size Density Curve by Sex",  subtitle = "Shows F/M distributions are almost identical, just off by one full size") +
  theme_538()



Question 6: Distribution of AVG ENDOSCOPY HOURS x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$PERFORMANCE_HOURS, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Performance Hours by Birth Sex",
          axis.titles = c('Performance Hour Bands'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Performance Hours is desired 


CrossTable(SURVEY$PERFORMANCE_HOURS, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                          | SURVEY$BIRTHSEX 
## SURVEY$PERFORMANCE_HOURS |         F |         M | Row Total | 
## -------------------------|-----------|-----------|-----------|
##                     < 10 |        26 |        30 |        56 | 
##                          |     0.464 |     0.536 |     0.238 | 
##                          |     0.232 |     0.244 |           | 
## -------------------------|-----------|-----------|-----------|
##                    10-20 |        55 |        55 |       110 | 
##                          |     0.500 |     0.500 |     0.468 | 
##                          |     0.491 |     0.447 |           | 
## -------------------------|-----------|-----------|-----------|
##                    21-30 |        20 |        29 |        49 | 
##                          |     0.408 |     0.592 |     0.209 | 
##                          |     0.179 |     0.236 |           | 
## -------------------------|-----------|-----------|-----------|
##                    31-40 |         8 |         5 |        13 | 
##                          |     0.615 |     0.385 |     0.055 | 
##                          |     0.071 |     0.041 |           | 
## -------------------------|-----------|-----------|-----------|
##                     > 40 |         3 |         4 |         7 | 
##                          |     0.429 |     0.571 |     0.030 | 
##                          |     0.027 |     0.033 |           | 
## -------------------------|-----------|-----------|-----------|
##             Column Total |       112 |       123 |       235 | 
##                          |     0.477 |     0.523 |           | 
## -------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.264007     d.f. =  4     p =  0.6873295 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.6885774 
## 
## 



Question 7: Distribution of TEACHER SEX PEREFERENCE x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$BIRTHSEX, SURVEY$TEACHER_GENDER_PREFERENCE,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE,
          title = "Teacher Sex Preference by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Teacher Sex Pref?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 8A & 8B: Distribution of NUMBER OF TEACHERS BY SEX x BIRTHSEX


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab(SURVEY$FEMALE_TRAINERS, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Number of Female Trainers by Birth Sex",
          axis.titles = c('Approx. Female Trainers'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Number of Female Trainers is desired 

CrossTable(SURVEY$FEMALE_TRAINERS, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##                        | SURVEY$BIRTHSEX 
## SURVEY$FEMALE_TRAINERS |         F |         M | Row Total | 
## -----------------------|-----------|-----------|-----------|
##                   None |         2 |         2 |         4 | 
##                        |     0.500 |     0.500 |     0.017 | 
##                        |     0.018 |     0.016 |           | 
## -----------------------|-----------|-----------|-----------|
##                    1-2 |        10 |        12 |        22 | 
##                        |     0.455 |     0.545 |     0.093 | 
##                        |     0.088 |     0.098 |           | 
## -----------------------|-----------|-----------|-----------|
##                    3-5 |        49 |        41 |        90 | 
##                        |     0.544 |     0.456 |     0.381 | 
##                        |     0.434 |     0.333 |           | 
## -----------------------|-----------|-----------|-----------|
##                   6-10 |        41 |        47 |        88 | 
##                        |     0.466 |     0.534 |     0.373 | 
##                        |     0.363 |     0.382 |           | 
## -----------------------|-----------|-----------|-----------|
##                   > 10 |        11 |        21 |        32 | 
##                        |     0.344 |     0.656 |     0.136 | 
##                        |     0.097 |     0.171 |           | 
## -----------------------|-----------|-----------|-----------|
##           Column Total |       113 |       123 |       236 | 
##                        |     0.479 |     0.521 |           | 
## -----------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  4.010492     d.f. =  4     p =  0.4045878 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.3982956 
## 
## 
plot_xtab(SURVEY$MALE_TRAINERS, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Number of Male Trainers by Birth Sex",
          axis.titles = c('Approx. Male Trainers'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Number of Male Trainers is desired 

CrossTable(SURVEY$MALE_TRAINERS, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                      | SURVEY$BIRTHSEX 
## SURVEY$MALE_TRAINERS |         F |         M | Row Total | 
## ---------------------|-----------|-----------|-----------|
##                  1-2 |         0 |         2 |         2 | 
##                      |     0.000 |     1.000 |     0.009 | 
##                      |     0.000 |     0.016 |           | 
## ---------------------|-----------|-----------|-----------|
##                  3-5 |         8 |        11 |        19 | 
##                      |     0.421 |     0.579 |     0.081 | 
##                      |     0.071 |     0.090 |           | 
## ---------------------|-----------|-----------|-----------|
##                 6-10 |        53 |        58 |       111 | 
##                      |     0.477 |     0.523 |     0.472 | 
##                      |     0.469 |     0.475 |           | 
## ---------------------|-----------|-----------|-----------|
##                 > 10 |        52 |        51 |       103 | 
##                      |     0.505 |     0.495 |     0.438 | 
##                      |     0.460 |     0.418 |           | 
## ---------------------|-----------|-----------|-----------|
##         Column Total |       113 |       122 |       235 | 
##                      |     0.481 |     0.519 |           | 
## ---------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.36741     d.f. =  3     p =  0.4997301 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.644784 
## 
## 



Question 10A-10E: Transient Pain Immediately Following Session - Specific Body Part


#SJPlot cross tabulation with Chi-Square/df
  
plot_xtab( SURVEY$BIRTHSEX, SURVEY$EXPERIENCED_TRANSIENT_PAIN_HAND, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Transient Pain in Hand after Procedure by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Hand Pain?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Transient Pain in Neck/Shoulder after Procedure by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Neck/Should Pain?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX,SURVEY$EXPERIENCED_TRANSIENT_PAIN_BACK,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Transient Pain in Back after Procedure by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Back Pain?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCED_TRANSIENT_PAIN_LEG, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Transient Pain in Leg after Procedure by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Leg Pain?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCED_TRANSIENT_PAIN_FOOT,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Transient Pain in Foot after Procedure by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Foot Pain?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 11: Told Injuries were “Growing Pains” x BIRTHSEX

(Exclude all respondents who did not report having been injured)


#SJPlot cross tabulation with Chi-Square/df
  

SUBSET <-  sqldf( "select BIRTHSEX,
                          GROWING_PAINS
                  from SURVEY
                  where GROWING_PAINS != 'NA' ")

SUBSET <- SUBSET %>% 
    mutate(GROWING_PAINS = recode_factor( GROWING_PAINS, "N" = "N",
                                                         "Y" = "Y")) %>% droplevels()

          

plot_xtab(SUBSET$BIRTHSEX, SUBSET$GROWING_PAINS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Told Injuries were Growing Pains by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Growing Pains?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



ENDOSCOPIC SUITE ENVIRONMENT

Question 1A & 1B: Distribution of FORMAL TRAINING & INFORMAL TRAINING by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$FELLOWSHIP_FORMAL_ERGO_TRAINING, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Formal Ergo Training by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Formal Ergo Traiing?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$INFORMAL_TRAINING,margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Informal Ergo Training by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Informal Ergo Training?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 2A-2H: Distribution of TECHNIQUES DISCUSSED DURING TRAINING by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_POSTURAL, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Postural Awareness by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Postural Awareness?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_BEDHEIGHT, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Bed Height Adjustments by Birth Sex",
          axis.titles = "Birth Sex",  
          legend.title= "Training Bed Height?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_BEDANGLE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Bed Angle Adjustments by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Bed Angle?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_MONITORHEIGHT, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Monitor Height Adjustments by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Monitor Height?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_MUSCULOSKELETAL, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Musculoskeletal Maneuvers by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Musculoskeletal?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_EXERCISE_STRETCHING, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Exercise/Stretching by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Exer/Stretch?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_DIAL_EXTENDERS,margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Dial Extenders by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Training Dial Ext?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Training on Pediatric Colonoscopes by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Traiing Pedi Coloscope?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 3: Distribution of BUDGET ALLOTMENT by BIRTH SEX


plot_xtab(SURVEY$ERGO_TRAINING_BUDGET, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Ergonomic Training Budget by Birth Sex",
          axis.titles = c('Ergonomic Training Budget?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Ergo Training Budget is desired 


CrossTable(SURVEY$ERGO_TRAINING_BUDGET, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##                             | SURVEY$BIRTHSEX 
## SURVEY$ERGO_TRAINING_BUDGET |         F |         M | Row Total | 
## ----------------------------|-----------|-----------|-----------|
##                           Y |         2 |         0 |         2 | 
##                             |     1.000 |     0.000 |     0.008 | 
##                             |     0.018 |     0.000 |           | 
## ----------------------------|-----------|-----------|-----------|
##                           N |        30 |        34 |        64 | 
##                             |     0.469 |     0.531 |     0.271 | 
##                             |     0.265 |     0.276 |           | 
## ----------------------------|-----------|-----------|-----------|
##                          DK |        81 |        89 |       170 | 
##                             |     0.476 |     0.524 |     0.720 | 
##                             |     0.717 |     0.724 |           | 
## ----------------------------|-----------|-----------|-----------|
##                Column Total |       113 |       123 |       236 | 
##                             |     0.479 |     0.521 |           | 
## ----------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.206704     d.f. =  2     p =  0.3317572 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.5002872 
## 
## 



Question 4A & 4B: Distribution of FEEBACK FREQUENCY & FEEDBACK BY WHOM by BIRTH SEX


plot_xtab(SURVEY$ERGO_FEEDBACK, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Ergo Feedback Frequency by Birth Sex",
          axis.titles = c('How Frequently Ergo Feedback?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Ergo Feedback Frequency is desired 


CrossTable(SURVEY$ERGO_FEEDBACK, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##                      | SURVEY$BIRTHSEX 
## SURVEY$ERGO_FEEDBACK |         F |         M | Row Total | 
## ---------------------|-----------|-----------|-----------|
##                Never |         2 |         4 |         6 | 
##                      |     0.333 |     0.667 |     0.025 | 
##                      |     0.018 |     0.033 |           | 
## ---------------------|-----------|-----------|-----------|
##               Rarely |        37 |        46 |        83 | 
##                      |     0.446 |     0.554 |     0.352 | 
##                      |     0.327 |     0.374 |           | 
## ---------------------|-----------|-----------|-----------|
##            Sometimes |        57 |        61 |       118 | 
##                      |     0.483 |     0.517 |     0.500 | 
##                      |     0.504 |     0.496 |           | 
## ---------------------|-----------|-----------|-----------|
##                Often |        17 |        12 |        29 | 
##                      |     0.586 |     0.414 |     0.123 | 
##                      |     0.150 |     0.098 |           | 
## ---------------------|-----------|-----------|-----------|
##         Column Total |       113 |       123 |       236 | 
##                      |     0.479 |     0.521 |           | 
## ---------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.22049     d.f. =  3     p =  0.5279237 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.548075 
## 
## 
plot_xtab(SURVEY$ERGO_FEEDBACK_BY_WHOM, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Who Provides Ergo Feedback by Birth Sex",
          axis.titles = c('Who Provides Feedback?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Ergo Feedback by Whom is desired 


CrossTable(SURVEY$ERGO_FEEDBACK_BY_WHOM, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  236 
## 
##  
##                                 | SURVEY$BIRTHSEX 
##    SURVEY$ERGO_FEEDBACK_BY_WHOM |         F |         M | Row Total | 
## --------------------------------|-----------|-----------|-----------|
## Do not/rarely received feedback |        15 |        22 |        37 | 
##                                 |     0.405 |     0.595 |     0.157 | 
##                                 |     0.133 |     0.179 |           | 
## --------------------------------|-----------|-----------|-----------|
##            Mostly male teachers |        17 |        22 |        39 | 
##                                 |     0.436 |     0.564 |     0.165 | 
##                                 |     0.150 |     0.179 |           | 
## --------------------------------|-----------|-----------|-----------|
##          Mostly female teachers |        20 |        16 |        36 | 
##                                 |     0.556 |     0.444 |     0.153 | 
##                                 |     0.177 |     0.130 |           | 
## --------------------------------|-----------|-----------|-----------|
##                    Both equally |        61 |        63 |       124 | 
##                                 |     0.492 |     0.508 |     0.525 | 
##                                 |     0.540 |     0.512 |           | 
## --------------------------------|-----------|-----------|-----------|
##                    Column Total |       113 |       123 |       236 | 
##                                 |     0.479 |     0.521 |           | 
## --------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.021954     d.f. =  3     p =  0.5678626 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.5778785 
## 
## 



Question 5A - 5G: Distribution of EQUIPMENT OPITIMIZATOIN & AVAILABILITY by BIRTH SEX


plot_xtab( SURVEY$BIRTHSEX, SURVEY$ERGO_OPTIMIZATION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Ergonomically Optimized Equipment by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Ergo Optimization?",
          geom.colors = c("#006cc5", "lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$GLOVE_SIZE_AVAILABLE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Glove Size Availability by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Glove Size Avail?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$DIAL_EXTENDERS_AVAILABLE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Dial Extender Availability by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Dial Ext Avail?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET <-  sqldf( "select BIRTHSEX,
                          DIAL_EXTENDERS_ENCOURAGED
                  from SURVEY
                  where DIAL_EXTENDERS_ENCOURAGED != 'DU' ")

SUBSET <- SUBSET %>% 
    mutate(DIAL_EXTENDERS_ENCOURAGED = recode_factor( DIAL_EXTENDERS_ENCOURAGED, "N" = "N",
                                                         "Y" = "Y")) %>% droplevels()



plot_xtab(SUBSET$BIRTHSEX, SUBSET$DIAL_EXTENDERS_ENCOURAGED, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Dial Extenders Encouraged by Birth Sex - (Includes only subjects who use Dial Extenders)",
          axis.titles = "Birth Sex", 
          legend.title= "Dial Ext Encouraged?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET <-  sqldf( "select BIRTHSEX,
                          DIAL_EXTENDERS_FEMALEATT
                  from SURVEY
                  where DIAL_EXTENDERS_FEMALEATT != 'NA' ")

SUBSET <- SUBSET %>% 
    mutate(DIAL_EXTENDERS_FEMALEATT = recode_factor( DIAL_EXTENDERS_FEMALEATT, "N" = "N",
                                                         "Y" = "Y")) %>% droplevels()



plot_xtab(SUBSET$BIRTHSEX, SUBSET$DIAL_EXTENDERS_FEMALEATT, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Dial Extenders Encouraged with Female Att by Birth Sex - (Includes only subjects who use Dial Extenders)",
          axis.titles = "Birth Sex", 
          legend.title= "Dial Ext FemAtt?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET <-  sqldf( "select BIRTHSEX,
                          DIAL_EXTENDERS_MALEATT
                  from SURVEY
                  where DIAL_EXTENDERS_MALEATT != 'NA' ")

SUBSET <- SUBSET %>% 
    mutate(DIAL_EXTENDERS_MALEATT = recode_factor( DIAL_EXTENDERS_MALEATT, "N" = "N",
                                                         "Y" = "Y")) %>% droplevels()



plot_xtab(SUBSET$BIRTHSEX, SUBSET$DIAL_EXTENDERS_MALEATT, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Dial Extenders Encouraged with Male Att by Birth Sex - (Includes only subjects who use Dial Extenders)",
          axis.titles = "Birth Sex", 
          legend.title= "Dial Ext MaleAtt?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$PEDI_COLONOSCOPES_AVAILABLE, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Pediatric Colonoscopes by Birth Sex",
          axis.titles = c('Pedi Colonoscopes Available?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Pedi Colonoscopes is desired 


CrossTable(SURVEY$PEDI_COLONOSCOPES_AVAILABLE, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  235 
## 
##  
##                                    | SURVEY$BIRTHSEX 
## SURVEY$PEDI_COLONOSCOPES_AVAILABLE |         F |         M | Row Total | 
## -----------------------------------|-----------|-----------|-----------|
##                                  Y |       107 |       122 |       229 | 
##                                    |     0.467 |     0.533 |     0.974 | 
##                                    |     0.955 |     0.992 |           | 
## -----------------------------------|-----------|-----------|-----------|
##                                  N |         2 |         0 |         2 | 
##                                    |     1.000 |     0.000 |     0.009 | 
##                                    |     0.018 |     0.000 |           | 
## -----------------------------------|-----------|-----------|-----------|
##                                 DK |         3 |         1 |         4 | 
##                                    |     0.750 |     0.250 |     0.017 | 
##                                    |     0.027 |     0.008 |           | 
## -----------------------------------|-----------|-----------|-----------|
##                       Column Total |       112 |       123 |       235 | 
##                                    |     0.477 |     0.523 |           | 
## -----------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  3.475254     d.f. =  2     p =  0.1759374 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.1735968 
## 
## 



Question 6A - 6I: Distribution of TYPE OF LEAD APRON AVAILABILITY by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_AVAILABLE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Well-Fitted Lead Aprons Available",
          axis.titles = "Birth Sex", 
          legend.title= "Well-Fitted Lead Aprons?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_LW_ONEPIECE,margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons LW One-Piece Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons LW 1P?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_LW_TWOPIECE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons LW Two-Piece Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons LW 2P?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_STANDARD_ONEPIECE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons SW One-Piece Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Std 1P?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_STANDARD_TWOPIECE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons SW Two-Piece Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Std 2P?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_DOUBLE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons Double Lead (Maternity) Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Double?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_THYROID, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons Thyroid Shield Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Thyroid?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_MATERNALDOS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons Maternal Dosimeter Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Maternal?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$LEAD_APRONS_FETALDOS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Lead Aprons Fetal Dosimeter Available at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Lead Aprons Fetal?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 7A - 7G: Distribution of ERGONOMIC CONSIDERATIONS by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$ERGO_FORMAL_TIMEOUT_PRIOR, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Formal Ergo Timeouts at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Formal Ergo Timeout?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

CrossTable(SURVEY$ERGO_FORMAL_TIMEOUT_PRIOR, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T) 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  234 
## 
##  
##                                  | SURVEY$BIRTHSEX 
## SURVEY$ERGO_FORMAL_TIMEOUT_PRIOR |         F |         M | Row Total | 
## ---------------------------------|-----------|-----------|-----------|
##                                N |       104 |       115 |       219 | 
##                                  |     0.475 |     0.525 |     0.936 | 
##                                  |     0.929 |     0.943 |           | 
## ---------------------------------|-----------|-----------|-----------|
##                                Y |         8 |         7 |        15 | 
##                                  |     0.533 |     0.467 |     0.064 | 
##                                  |     0.071 |     0.057 |           | 
## ---------------------------------|-----------|-----------|-----------|
##                     Column Total |       112 |       122 |       234 | 
##                                  |     0.479 |     0.521 |           | 
## ---------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  0.1921786     d.f. =  1     p =  0.6611095 
## 
## Pearson's Chi-squared test with Yates' continuity correction 
## ------------------------------------------------------------
## Chi^2 =  0.02932413     d.f. =  1     p =  0.8640328 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Sample estimate odds ratio:  0.7921006 
## 
## Alternative hypothesis: true odds ratio is not equal to 1
## p =  0.7911129 
## 95% confidence interval:  0.2356057 2.596761 
## 
## Alternative hypothesis: true odds ratio is less than 1
## p =  0.431063 
## 95% confidence interval:  0 2.184852 
## 
## Alternative hypothesis: true odds ratio is greater than 1
## p =  0.75966 
## 95% confidence interval:  0.2822357 Inf 
## 
## 
## 
plot_xtab(SURVEY$BIRTHSEX, SURVEY$ERGO_INFORMAL_TIMEOUT_PRIOR, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Informal Ergo Timeouts at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Informal Ergo Timeout?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$MONITORS_ADJUSTABLE,margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Monitors Easily Adjustable at Institution by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Monitors Easily Adjust?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TEACHER_SENSITIVITY_STATURE_HANDSIZE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Teachers Train with Sensitivity to Stature/Hand Size by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Teacher Sensitivity to Stature?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$TEACHER_SENSITIVITY_BY_GENDER, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Sex of Sensitive Teachers by Birth Sex",
          axis.titles = c('Male or Female Teachers More Sensitive?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Male or Female Teacher more Sensitive is desired 

CrossTable(SURVEY$TEACHER_SENSITIVITY_BY_GENDER, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  234 
## 
##  
##                                      | SURVEY$BIRTHSEX 
## SURVEY$TEACHER_SENSITIVITY_BY_GENDER |         F |         M | Row Total | 
## -------------------------------------|-----------|-----------|-----------|
##                                 Male |         8 |         8 |        16 | 
##                                      |     0.500 |     0.500 |     0.068 | 
##                                      |     0.071 |     0.066 |           | 
## -------------------------------------|-----------|-----------|-----------|
##                               Female |        23 |        10 |        33 | 
##                                      |     0.697 |     0.303 |     0.141 | 
##                                      |     0.205 |     0.082 |           | 
## -------------------------------------|-----------|-----------|-----------|
##                         Both Equally |        55 |        72 |       127 | 
##                                      |     0.433 |     0.567 |     0.543 | 
##                                      |     0.491 |     0.590 |           | 
## -------------------------------------|-----------|-----------|-----------|
##             Never had female teacher |         3 |         2 |         5 | 
##                                      |     0.600 |     0.400 |     0.021 | 
##                                      |     0.027 |     0.016 |           | 
## -------------------------------------|-----------|-----------|-----------|
##                             Not Sure |        23 |        30 |        53 | 
##                                      |     0.434 |     0.566 |     0.226 | 
##                                      |     0.205 |     0.246 |           | 
## -------------------------------------|-----------|-----------|-----------|
##                         Column Total |       112 |       122 |       234 | 
##                                      |     0.479 |     0.521 |           | 
## -------------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  8.108789     d.f. =  4     p =  0.08767341 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.07838717 
## 
## 
plot_xtab(SURVEY$BIRTHSEX, SURVEY$TACTILE_INSTRUCTION_FROM_MALES, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Tactile Instruction from Male Teachers by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Tactile Instruction from Males?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$TACTILE_INSTRUCTION_FROM_FEMALES, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Tactile Instruction from Females Teachers by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Tactile Instruction from Females?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



Question 8A - 8I: Distribution of RESPECT-ORIENTED ANSWERS by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$COMFORTABLE_ASKING_NURSES,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Comfortable Asking Nurses for Help by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Comfortable Asking Nurses?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

SUBSET <-  sqldf( "select BIRTHSEX,
                          COMFORTABLE_ASKING_TECHS
                  from SURVEY
                  where COMFORTABLE_ASKING_TECHS != 'NA' ")

SUBSET <- SUBSET %>% 
    mutate(COMFORTABLE_ASKING_TECHS = recode_factor( COMFORTABLE_ASKING_TECHS, "N" = "N",
                                                         "Y" = "Y")) %>% droplevels()



plot_xtab(SUBSET$BIRTHSEX, SUBSET$COMFORTABLE_ASKING_TECHS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Comfortable Asking Techs for Help by Birth Sex (Includes only respondents with Techs)",
          axis.titles = "Birth Sex", 
          legend.title= "Comfortable Asking Techs?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$NURSES_ASKING, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Times Asking Nurses for Help by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Times Asking Nurses?",
          geom.colors = c("#006cc5", "lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$MALE_ATTENDINGS_ASKING, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Times Male Attending Asking Nurses for Help by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Times Asking MaleAtt?",
          geom.colors = c("#006cc5", "lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

CrossTable(SURVEY$MALE_ATTENDINGS_ASKING, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  229 
## 
##  
##                               | SURVEY$BIRTHSEX 
## SURVEY$MALE_ATTENDINGS_ASKING |         F |         M | Row Total | 
## ------------------------------|-----------|-----------|-----------|
##                          Once |        49 |        63 |       112 | 
##                               |     0.438 |     0.562 |     0.489 | 
##                               |     0.454 |     0.521 |           | 
## ------------------------------|-----------|-----------|-----------|
##                         Twice |        18 |        26 |        44 | 
##                               |     0.409 |     0.591 |     0.192 | 
##                               |     0.167 |     0.215 |           | 
## ------------------------------|-----------|-----------|-----------|
##               More than twice |        41 |        32 |        73 | 
##                               |     0.562 |     0.438 |     0.319 | 
##                               |     0.380 |     0.264 |           | 
## ------------------------------|-----------|-----------|-----------|
##                  Column Total |       108 |       121 |       229 | 
##                               |     0.472 |     0.528 |           | 
## ------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  3.587705     d.f. =  2     p =  0.1663182 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.1745347 
## 
## 
plot_xtab(SURVEY$FEMALE_ATTENDINGS_ASKING, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Times Female Attending Asking Nurses for Help by Birth Sex",
          axis.titles = c('Times Female Att Asking Nurses?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

CrossTable(SURVEY$FEMALE_ATTENDINGS_ASKING, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  229 
## 
##  
##                                 | SURVEY$BIRTHSEX 
## SURVEY$FEMALE_ATTENDINGS_ASKING |         F |         M | Row Total | 
## --------------------------------|-----------|-----------|-----------|
##                            Once |        42 |        51 |        93 | 
##                                 |     0.452 |     0.548 |     0.406 | 
##                                 |     0.389 |     0.421 |           | 
## --------------------------------|-----------|-----------|-----------|
##                           Twice |        25 |        32 |        57 | 
##                                 |     0.439 |     0.561 |     0.249 | 
##                                 |     0.231 |     0.264 |           | 
## --------------------------------|-----------|-----------|-----------|
##                 More than Twice |        37 |        36 |        73 | 
##                                 |     0.507 |     0.493 |     0.319 | 
##                                 |     0.343 |     0.298 |           | 
## --------------------------------|-----------|-----------|-----------|
##          Don't work with FemAtt |         4 |         2 |         6 | 
##                                 |     0.667 |     0.333 |     0.026 | 
##                                 |     0.037 |     0.017 |           | 
## --------------------------------|-----------|-----------|-----------|
##                    Column Total |       108 |       121 |       229 | 
##                                 |     0.472 |     0.528 |           | 
## --------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  1.6784     d.f. =  3     p =  0.6417466 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.6522275 
## 
## 
plot_xtab(SURVEY$BIRTHSEX, SURVEY$RECOGNIZED_RESPECTED_ES_STAFF, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Recognized/Respected by ES Staff by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Recog by ES Staff?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$RECOGNIZED_RESPECTED_ANESTHETISTS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Recognized/Respected by Anesthetists by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Recog by Anesthetists?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab( SURVEY$BIRTHSEX, SURVEY$RECOGNIZED_RESPECTED_GASTRO_ATTENDING,margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Recognized/Respected by Gastro Attending by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Recog by Gastro Att?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

plot_xtab(SURVEY$BIRTHSEX, SURVEY$FIRST_NAME_NO_PERMISSION,  margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "First Name Used No Permission by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "First Name No Permission?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Is there a RACE difference on this question ?


plot_xtab(SURVEY$RACE2, SURVEY$FIRST_NAME_NO_PERMISSION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "First Name Used No Permission by Race (Broad)",
          axis.titles = "Binary Race Category", 
          legend.title= "First Name Used No Permission?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Is there a TRAINING LEVEL difference on this question ?


plot_xtab(SURVEY$TRAINING_LEVEL, SURVEY$FIRST_NAME_NO_PERMISSION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "First Name Used No Permission by Training Level",
          axis.titles = "Binary Race Category", 
          legend.title= "First Name Used No Permission?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Is there a HEIGHT BAND difference on this question ?


plot_xtab(SURVEY$HEIGHT2, SURVEY$FIRST_NAME_NO_PERMISSION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "First Name Used No Permission by Height Band",
          axis.titles = "Height Band", 
          legend.title= "First Name Used No Permission?",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())



FUTURE DESIRES FOR ENDOSCOPY SUITE

Question 1: Distribution of MANDATORY ERGO TRAINING by BIRTH SEX


plot_xtab(SURVEY$ERGO_TRAINING_MANDATORY, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Should Ergo Training be Mandatory by Birth Sex",
          axis.titles = c('Mandatory Ergo Training?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Ergo Training be Mandatory is desired 

CrossTable(SURVEY$ERGO_TRAINING_MANDATORY, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T , prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  234 
## 
##  
##                                | SURVEY$BIRTHSEX 
## SURVEY$ERGO_TRAINING_MANDATORY |         F |         M | Row Total | 
## -------------------------------|-----------|-----------|-----------|
##                              Y |       112 |       116 |       228 | 
##                                |     0.491 |     0.509 |     0.974 | 
##                                |     0.991 |     0.959 |           | 
## -------------------------------|-----------|-----------|-----------|
##                              N |         0 |         2 |         2 | 
##                                |     0.000 |     1.000 |     0.009 | 
##                                |     0.000 |     0.017 |           | 
## -------------------------------|-----------|-----------|-----------|
##                             DK |         1 |         3 |         4 | 
##                                |     0.250 |     0.750 |     0.017 | 
##                                |     0.009 |     0.025 |           | 
## -------------------------------|-----------|-----------|-----------|
##                   Column Total |       113 |       121 |       234 | 
##                                |     0.483 |     0.517 |           | 
## -------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  2.799944     d.f. =  2     p =  0.2466039 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.3718787 
## 
## 

Question 2: Distribution of ERGO BUDGET FOR OPTIMIZATION by BIRTH SEX


plot_xtab(SURVEY$ERGO_OPTIMIZAITON_BUDGET_REQUIRED, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Should Budget Include Ergo Optimiation by Birth Sex",
          axis.titles = c('Budget Should Include Ergo Opti?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Ergo Optimization Budget be Mandatory is desired 

CrossTable(SURVEY$ERGO_OPTIMIZAITON_BUDGET_REQUIRED, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  233 
## 
##  
##                                          | SURVEY$BIRTHSEX 
## SURVEY$ERGO_OPTIMIZAITON_BUDGET_REQUIRED |         F |         M | Row Total | 
## -----------------------------------------|-----------|-----------|-----------|
##                                        Y |       109 |       106 |       215 | 
##                                          |     0.507 |     0.493 |     0.923 | 
##                                          |     0.965 |     0.883 |           | 
## -----------------------------------------|-----------|-----------|-----------|
##                                        N |         0 |         3 |         3 | 
##                                          |     0.000 |     1.000 |     0.013 | 
##                                          |     0.000 |     0.025 |           | 
## -----------------------------------------|-----------|-----------|-----------|
##                                       DK |         4 |        11 |        15 | 
##                                          |     0.267 |     0.733 |     0.064 | 
##                                          |     0.035 |     0.092 |           | 
## -----------------------------------------|-----------|-----------|-----------|
##                             Column Total |       113 |       120 |       233 | 
##                                          |     0.485 |     0.515 |           | 
## -----------------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  6.103736     d.f. =  2     p =  0.04727055 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.04412296 
## 
## 

Question 3: Distribution of INCREASED AVAILABILITY OF DIAL EXTENDERS by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCE_IMPROVED_DIAL_EXTENDERS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Increased Availability of Dial Extenders by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Increase Avail Dial Ext?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 4: Distribution of INCREASED AVAILABILITY OF PEDI COLONSCOPES by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCE_IMPROVED_PEDI_COLONOSCOPES, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Increased Availability of Pedi Colonoscopes by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Increase Avail Pediscopes?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 5: Distribution of INCREASED AVAILABILITY OF LEAD APRONS by BIRTH SEX


plot_xtab(SURVEY$BIRTHSEX, SURVEY$EXPERIENCE_IMPROVED_APRONS, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Increased Availability of Lead Aprons by Birth Sex",
          axis.titles = "Birth Sex", 
          legend.title= "Improve Aprons?",
          geom.colors = c("#006cc5","lightblue", "#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 6: Distribution of ERGO TRAINING REQUIRED FOR TEACHERS by BIRTH SEX


plot_xtab(SURVEY$ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED, SURVEY$BIRTHSEX, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Formal Ergo Training Required for Teachers by Birth Sex",
          axis.titles = c('Ergo Training Required for Teachers?'), 
          legend.title= "Birth Sex",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

#Alternative View, if Birth Sex by Formal Teacher Training Mandatory is desired 

CrossTable(SURVEY$ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=T, prop.t=F, chisq=T, fisher=T)  #rows then/over columns
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Row Total |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  234 
## 
##  
##                                               | SURVEY$BIRTHSEX 
## SURVEY$ENDO_TEACHERS_FORMAL_TRAINING_REQUIRED |         F |         M | Row Total | 
## ----------------------------------------------|-----------|-----------|-----------|
##                                             Y |       103 |       105 |       208 | 
##                                               |     0.495 |     0.505 |     0.889 | 
##                                               |     0.912 |     0.868 |           | 
## ----------------------------------------------|-----------|-----------|-----------|
##                                             N |         1 |         4 |         5 | 
##                                               |     0.200 |     0.800 |     0.021 | 
##                                               |     0.009 |     0.033 |           | 
## ----------------------------------------------|-----------|-----------|-----------|
##                                            DK |         9 |        12 |        21 | 
##                                               |     0.429 |     0.571 |     0.090 | 
##                                               |     0.080 |     0.099 |           | 
## ----------------------------------------------|-----------|-----------|-----------|
##                                  Column Total |       113 |       121 |       234 | 
##                                               |     0.483 |     0.517 |           | 
## ----------------------------------------------|-----------|-----------|-----------|
## 
##  
## Statistics for All Table Factors
## 
## 
## Pearson's Chi-squared test 
## ------------------------------------------------------------
## Chi^2 =  1.976608     d.f. =  2     p =  0.3722074 
## 
## 
##  
## Fisher's Exact Test for Count Data
## ------------------------------------------------------------
## Alternative hypothesis: two.sided
## p =  0.4698464 
## 
## 


ERGONOMIC KNOWLEDGE

Question 1: Importance of Ergonomics in Relation to ERI


plot_xtab(SURVEY$BIRTHSEX, SURVEY$ERGONOMIC_IMPORTANCE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Importance of Ergonomics in Relation to ERI",
          axis.titles = "Birth Sex", 
          legend.title= "Ergo Importance Response",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 2: Mitigation Stategies to Reduce Muscle Strain Risk


plot_xtab(SURVEY$BIRTHSEX, SURVEY$MITIGATING_MUSCLE_STRAIN, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Mitigation Strategies to Reduce Muscle Strain Risk",
          axis.titles = "Birth Sex", 
          legend.title= "Muscle Strain Response",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 3: Mitigation Stategies to Reduce Muscle Strain Risk


plot_xtab(SURVEY$BIRTHSEX, SURVEY$BED_POSITION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Bed Position in Relation to the Elbow",
          axis.titles = "Birth Sex", 
          legend.title= "Bed Position Response",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 4: Best Position for Endoscopy Trainer


plot_xtab(SURVEY$BIRTHSEX, SURVEY$ENDO_TRAINER_POSITION, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Best Position for Endoscopy Trainer",
          axis.titles = "Birth Sex", 
          legend.title= "Trainer Position Response",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())


Question 5: Best Time to Explore Disability Insurance


plot_xtab(SURVEY$BIRTHSEX, SURVEY$WHEN_DISABILITY_INSURANCE, margin = "row", 
          bar.pos = "stack", coord.flip = TRUE, 
          title = "Best Time to Explore Disability Insurance",
          axis.titles = "Birth Sex", 
          legend.title= "When Disab Ins Response",
          geom.colors = c("#006cc5","#cbcccb"), 
          show.summary = TRUE )+ 
  set_theme(base= theme_classic())

# CORRECT <-
#   
#   SURVEY %>% 
#   mutate(CORRECT = across(.cols =  ERGONOMIC_IMPORTANCE: WHEN_DISABILITY_INSURANCE , .fns = str_count, "Correct")) %>%
#           rowwise() %>%
#           mutate(COUNT_CORRECT = across(.cols = contains("Correct"), .fns = sum)) %>% 
#           select (BIRTHSEX, ERGONOMIC_IMPORTANCE: WHEN_DISABILITY_INSURANCE, COUNT_CORRECT) %>% 
#           mutate( COUNT_CORRECT = as.integer(COUNT_CORRECT) )


# CORRECT <-
#    SURVEY %>%
#     select (BIRTHSEX, ERGONOMIC_IMPORTANCE: WHEN_DISABILITY_INSURANCE,) %>%
#     mutate( COUNT_CORRECT = apply( . , 1, function(x) sum( x== "Correct", na.rm= T )))



CORRECT <-
   SURVEY %>% 
    select (RECORD_ID, BIRTHSEX, ERGONOMIC_IMPORTANCE: WHEN_DISABILITY_INSURANCE,) %>% 
    mutate( COUNT_CORRECT = rowSums( . == "Correct" ))


eov.ttest(CORRECT, COUNT_CORRECT, BIRTHSEX)  
## [1] "F Test p.value = 0.4348913  EOV = TRUE (Pooled)"
## [1] "CORRECT : COUNT_CORRECT ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  CORRECT : COUNT_CORRECT ~ BIRTHSEX
## t = 1.2623, df = 234, p-value = 0.2081
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -0.1096219  0.5005853
## sample estimates:
## mean in group F mean in group M 
##        3.504425        3.308943
ggbetweenstats( data= CORRECT,
                x = BIRTHSEX,
                y = COUNT_CORRECT,
                type="parametric",
                p.adjust.method = "none",
                title = "Mean Scores by Birth Sex")



QUICK CORRELATIONS & ASSOCIATIONS


A WARNING ABOUT THE FOLLOWING CORRELATIONAL DATA


The appropriate tack here would be to run a series of “point biserial correlations,” which are correlations between a categorical data (structured as T/F, 0/1, or ordinal, such as Low-Medium-High or Freshman-Sophomore-Junior-Senior) against a continuous variable. In our data, the only continuous variable is GLOVE (glove size), so therefore, only the correlations where our demographic categorical data is compared to GLOVE (e.g., BIRTHSEX, AGEBAND, TRNG_LEVEL, HEIGHT_BAND) are statistically valid. For other categorical variables that we have coerced into a binary numeric – EVER_INJURED, TRANSIENT_PAIN_XXXX, ERGO_OPTIM – any correlations against other categorical variables would technically be statistically inappropriate to run, and should certainly never be reported. For our purposes here, these “correlations” should be used only to guide us to perform other, more appropriate statistical tests (such as logistic regression, which is a natural extension of the Chi-Square analyses run previously).



Task 1: Produce a CORRELATION MATRIX that displays the high-level associations among Sex, Race, Age, Height and Glove Size with other key variables of interest

CORRMATRIX <-

  SURVEY %>%
  select (BIRTHSEX, RACE2, AGE2, TRAINING_LEVEL, PERFORMANCE_HOURS, HEIGHT2, GLOVE, EVER_INJURED, ERGO_OPTIMIZATION, TEACHER_SENSITIVITY_STATURE_HANDSIZE,
          FIRST_NAME_NO_PERMISSION, EXPERIENCED_TRANSIENT_PAIN_HAND : EXPERIENCED_TRANSIENT_PAIN_FOOT ) %>% 
  
  # na.omit()  %>% 
  
  mutate( SEX = case_when(  is.na(BIRTHSEX) ~ NA_real_,
                              BIRTHSEX == "M" ~ 0,
                                 TRUE ~ 1 ) ,
          
          RACE = case_when( is.na(RACE2) ~ NA_real_,
                            RACE2 == "WHITE" ~ 0,
                            TRUE  ~ 1),
          
          AGECAT = case_when( is.na(AGE2) ~ NA_real_,
                              AGE2 == "< 30" ~ 1,
                              AGE2 == "30-34" ~ 2,
                              AGE2 == "35-40" ~3,
                              TRUE ~ 4),
          
          TRNG_LEVEL = case_when( is.na(TRAINING_LEVEL) ~ NA_real_,
                                  TRAINING_LEVEL == 'First Year' ~ 1,
                                  TRAINING_LEVEL == 'Second Year' ~ 2,
                                  TRAINING_LEVEL == 'Third Year' ~ 3,
                                  TRUE  ~ 4) ,
          
          PERF_HRS = case_when( is.na(PERFORMANCE_HOURS) ~ NA_real_,
                          PERFORMANCE_HOURS == '< 10' ~ 1,
                          PERFORMANCE_HOURS == '10-20' ~ 2,
                          TRUE  ~ 3) ,
          
          HEIGHT = case_when( is.na(HEIGHT2) ~ NA_real_,
                              HEIGHT2 == "< 5" ~ 1,
                              HEIGHT2 == "5-5'3" ~ 2,
                              HEIGHT2 == "5'4-5'6" ~ 3,
                              HEIGHT2 == "5'7-5'9" ~ 4,
                              HEIGHT2 == "5'10-6'" ~ 5,
                              HEIGHT2 == "6'1-6'4" ~ 6,
                                         TRUE ~ 7),
          
          INJURY = case_when(  is.na(EVER_INJURED) ~ NA_real_,
                               EVER_INJURED== "Y" ~ 1,
                                    TRUE ~ 0)  ,
          
          TPAIN =  case_when(   is.na(EXPERIENCED_TRANSIENT_PAIN_BACK) |
                                is.na(EXPERIENCED_TRANSIENT_PAIN_HAND) |
                                is.na(EXPERIENCED_TRANSIENT_PAIN_FOOT) |
                                is.na(EXPERIENCED_TRANSIENT_PAIN_HAND) |
                                is.na(EXPERIENCED_TRANSIENT_PAIN_LEG) |
                                is.na(EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER) ~ NA_real_,
                                  
                                EXPERIENCED_TRANSIENT_PAIN_BACK == "Y" |
                                EXPERIENCED_TRANSIENT_PAIN_HAND == "Y" |
                                EXPERIENCED_TRANSIENT_PAIN_FOOT == "Y" |
                                EXPERIENCED_TRANSIENT_PAIN_HAND == "Y" |
                                EXPERIENCED_TRANSIENT_PAIN_LEG == "Y" |
                                EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER == "Y" ~ 1,
                                TRUE ~ 0),
          
          
          ERGO_OPTIM =  case_when( is.na(ERGO_OPTIMIZATION) ~ NA_real_,
                                  ERGO_OPTIMIZATION == "Y" ~ 1,
                                    TRUE ~ 0),
                                   
          SENSITIVE_TEACHER = case_when (is.na(TEACHER_SENSITIVITY_STATURE_HANDSIZE) ~ NA_real_,
                                         TEACHER_SENSITIVITY_STATURE_HANDSIZE == "Y" ~ 1,
                                          TRUE ~ 0),
          
          FNAME_NP =  case_when (is.na(FIRST_NAME_NO_PERMISSION) ~ NA_real_,
                                 FIRST_NAME_NO_PERMISSION == "Y" ~ 1,
                                 TRUE ~ 0)) %>% 
  
  select( SEX, RACE, AGECAT, TRNG_LEVEL, PERF_HRS, HEIGHT, GLOVE, INJURY, TPAIN, ERGO_OPTIM, SENSITIVE_TEACHER, FNAME_NP)




pairs.panels( CORRMATRIX, pch=21, stars=T,) 


There does not appear to be any association in this sample with HEIGHT or GLOVE and having sustained an injury or experienced any of the five specific transient pain types listed in the survey. Does this change if we adjust the correlations based on sex? Or race?


require(ppcor)
## Loading required package: ppcor
#Partial Correlation:  HEIGHT + INJURY correlation adjusted for SEX and then RACE

CORRMATRIX_NOMISS <- CORRMATRIX %>% na.omit()

attach(CORRMATRIX_NOMISS)

pcor.test( x=HEIGHT, y=INJURY, z=SEX, method= 'pearson')
##     estimate   p.value statistic   n gp  Method
## 1 0.03413686 0.6121163 0.5077768 224  1 pearson
pcor.test( x=HEIGHT, y=INJURY, z=RACE, method= 'pearson' )
##     estimate   p.value statistic   n gp  Method
## 1 0.04304191 0.5225387 0.6404575 224  1 pearson
#Partial Correlation:  GLOVE + INJURY correlation adjusted for SEX and then RACE

pcor.test( x=GLOVE, y=INJURY, z=SEX, method= 'pearson' )
##     estimate   p.value  statistic   n gp  Method
## 1 -0.0248085 0.7125418 -0.3689184 224  1 pearson
pcor.test( x=GLOVE, y=INJURY, z=RACE, method= 'pearson' )
##      estimate   p.value  statistic   n gp  Method
## 1 0.003767121 0.9553903 0.05600268 224  1 pearson
detach(CORRMATRIX_NOMISS)


Task 2: Are Ergonomic Difficulties reported among respondents who were PREGNANT during their fellowship associated at all with GLOVE or HEIGHT ?

  • Total N for subjects who were pregnant during fellowship = 23
  • Only one (1) subject reported an Ergonomic Injury during her fellowship, so meaningful analysis along this metric could not be performed


#Ergonomic Difficulty during Pregnancy?

CrossTable(SURVEY$PREGNANCY_ERGO_DIFFICULTY, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=F, prop.t=F, chisq=F, fisher=F)  
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  23 
## 
##  
##                                  | SURVEY$BIRTHSEX 
## SURVEY$PREGNANCY_ERGO_DIFFICULTY |         F |         M | Row Total | 
## ---------------------------------|-----------|-----------|-----------|
##                                N |         9 |         0 |         9 | 
##                                  |     0.391 |       NaN |           | 
## ---------------------------------|-----------|-----------|-----------|
##                                Y |        14 |         0 |        14 | 
##                                  |     0.609 |       NaN |           | 
## ---------------------------------|-----------|-----------|-----------|
##                     Column Total |        23 |         0 |        23 | 
##                                  |     1.000 |     0.000 |           | 
## ---------------------------------|-----------|-----------|-----------|
## 
## 
CrossTable(SURVEY$PREGNANCY_ERGO_INJURY, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=F, prop.t=F, chisq=F, fisher=F) 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  22 
## 
##  
##                              | SURVEY$BIRTHSEX 
## SURVEY$PREGNANCY_ERGO_INJURY |         F |         M | Row Total | 
## -----------------------------|-----------|-----------|-----------|
##                            N |        21 |         0 |        21 | 
##                              |     0.955 |       NaN |           | 
## -----------------------------|-----------|-----------|-----------|
##                            Y |         1 |         0 |         1 | 
##                              |     0.045 |       NaN |           | 
## -----------------------------|-----------|-----------|-----------|
##                 Column Total |        22 |         0 |        22 | 
##                              |     1.000 |     0.000 |           | 
## -----------------------------|-----------|-----------|-----------|
## 
## 
CrossTable(SURVEY$POSTPARTUM_ERGO_INJURY, SURVEY$BIRTHSEX, prop.chisq=F, prop.c=T, prop.r=F, prop.t=F, chisq=F, fisher=F) 
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## |           N / Col Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  20 
## 
##  
##                               | SURVEY$BIRTHSEX 
## SURVEY$POSTPARTUM_ERGO_INJURY |         F |         M | Row Total | 
## ------------------------------|-----------|-----------|-----------|
##                             N |        19 |         0 |        19 | 
##                               |     0.950 |       NaN |           | 
## ------------------------------|-----------|-----------|-----------|
##                             Y |         1 |         0 |         1 | 
##                               |     0.050 |       NaN |           | 
## ------------------------------|-----------|-----------|-----------|
##                  Column Total |        20 |         0 |        20 | 
##                               |     1.000 |     0.000 |           | 
## ------------------------------|-----------|-----------|-----------|
## 
## 
#Correlation Matrix to view "associations" with Glove Size and Height


PREGNANCY  <-
  
  SURVEY %>%
  filter( FELLOWSHIP_PREGNANCY == "Y") %>% 
  select (BIRTHSEX, RACE2, AGE2, TRAINING_LEVEL, HEIGHT2, GLOVE, PREGNANCY_ERGO_DIFFICULTY ) %>% 
  
  mutate( SEX = case_when(  is.na(BIRTHSEX) ~ NA_real_,
                            BIRTHSEX == "M" ~ 0,
                            TRUE ~ 1 ) ,
          
          RACE = case_when( is.na(RACE2) ~ NA_real_,
                            RACE2 == "WHITE" ~ 0,
                            TRUE  ~ 1),
          
          AGECAT = case_when( is.na(AGE2) ~ NA_real_,
                              AGE2 == "< 30" ~ 1,
                              AGE2 == "30-34" ~ 2,
                              AGE2 == "35-40" ~3,
                              TRUE ~ 4),
          
          TRNG_LEVEL = case_when( is.na(TRAINING_LEVEL) ~ NA_real_,
                                  TRAINING_LEVEL == 'First Year' ~ 1,
                                  TRAINING_LEVEL == 'Second Year' ~ 2,
                                  TRAINING_LEVEL == 'Third Year' ~ 3,
                                  TRUE  ~ 4) ,
          
          HEIGHT = case_when( is.na(HEIGHT2) ~ NA_real_,
                              HEIGHT2 == "< 5" ~ 1,
                              HEIGHT2 == "5-5'3" ~ 2,
                              HEIGHT2 == "5'4-5'6" ~ 3,
                              HEIGHT2 == "5'7-5'9" ~ 4,
                              HEIGHT2 == "5'10-6'" ~ 5,
                              HEIGHT2 == "6'1-6'4" ~ 6,
                              TRUE ~ 7),
          
          ERGO_DIFF = case_when( is.na(PREGNANCY_ERGO_DIFFICULTY) ~ NA_real_,
                                 PREGNANCY_ERGO_DIFFICULTY == 'N' ~ 0,
                                  TRUE  ~ 1) ) %>% 
  select( SEX, RACE, AGECAT, TRNG_LEVEL, ERGO_DIFF, GLOVE, HEIGHT)


pairs.panels( PREGNANCY[c(-1,-2)] , pch=21, stars=T) 


Task 3: How are the associations/correlations for certain variables affected if we statify them by Training Level?

  • Ergonomic Optimizaiton
  • Injury
  • Transient Pain
  • Sensitive Teachers
  • First Name No Permission


require(ggcorrplot)
## Loading required package: ggcorrplot
#Paneled Correlation Matrices for each level of "Training Level" (4)


CORRMATRIX %>%
  mutate(TRNG_LEVEL = factor(TRNG_LEVEL),
         TRNG_LEVEL = recode_factor(TRNG_LEVEL, '1'= 'First Yr', '2'= "Second Yr", '3'= "Third Yr", '4'="Advanced", .ordered=T)) %>%
  select(TRNG_LEVEL, ERGO_OPTIM, INJURY, TPAIN, SENSITIVE_TEACHER, FNAME_NP) %>%
  
  grouped_ggcorrmat(
    ## arguments relevant for `ggcorrmat`
    data = . ,
    
    grouping.var = TRNG_LEVEL,
    ## arguments relevant for `combine_plots`
    plotgrid.args = list(nrow = 2),
    annotation.args = list(
      tag_levels = "1",
      title = "Relationship among Key Variables across Training Level"
    ),
    p.adjust.method = "none"
  )

There apprears to be a few slightly significant “correlations” that might be worth exploring with simple logistic regression:
  • First Year: Transient Pain with Sensitive Teacher & wth Ergo Optimization (based on borderline p-value, not displayed)
  • Second Year: First Name & Ergo Optimization (???)
  • Advanced: Transient Pain & Sensitive Teacher


Let’s explore further with few unadjusted/adjusted regressions where Transient Pain is the outcome and Ergo Optimization is the outcome:



  • 1. Does Ergonomic Optimization influence reports of Transient Pain, with and without adjusting for Training Level?


require(car)

TLEVEL <-
CORRMATRIX %>%
  mutate(TRNG_LEVEL = factor(TRNG_LEVEL),
         TRNG_LEVEL = recode_factor(TRNG_LEVEL, '1'= 'First Yr', '2'= "Second Yr", '3'= "Third Yr", '4'="Advanced", .ordered=F),
         TRNG_LEVEL =  relevel(TRNG_LEVEL, ref= "Advanced")) %>%
  select(TRNG_LEVEL, ERGO_OPTIM, INJURY, TPAIN, SENSITIVE_TEACHER, FNAME_NP)
  

      tpain.eo.glm.unadj <- glm( TPAIN ~ ERGO_OPTIM, data= TLEVEL, family= binomial(link="logit"))
      tpain.eo.emmeans.unadj <- emmeans( tpain.eo.glm.unadj,  ~ c(ERGO_OPTIM), type= "response")
      tpain.eo.glm.adj   <- glm( TPAIN ~ ERGO_OPTIM + TRNG_LEVEL, data= TLEVEL, family= binomial(link="logit"))
      tpain.eo.emmeans.adj <- emmeans( tpain.eo.glm.adj,  ~ c(ERGO_OPTIM), type= "response")
      
      
      summary(tpain.eo.glm.unadj)
## 
## Call:
## glm(formula = TPAIN ~ ERGO_OPTIM, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3678   0.3536   0.3536   0.4173   0.6085  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.7408     0.3263   8.401   <2e-16 ***
## ERGO_OPTIM   -1.1482     0.4547  -2.525   0.0116 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 146.29  on 235  degrees of freedom
## Residual deviance: 139.96  on 234  degrees of freedom
## AIC: 143.96
## 
## Number of Fisher Scoring iterations: 5
      contrast(tpain.eo.emmeans.unadj, list("Unadj OR: Ergo Optim =Y on Transient Pain =Y" = c(-1, 1)))
##  contrast                                     odds.ratio    SE  df null z.ratio
##  Unadj OR: Ergo Optim =Y on Transient Pain =Y      0.317 0.144 Inf    1  -2.525
##  p.value
##   0.0116
## 
## Tests are performed on the log odds ratio scale
      contrast(tpain.eo.emmeans.unadj, list("Unadj OR: Ergo Optim =N on Transient Pain =Y" = c(1, -1)))
##  contrast                                     odds.ratio   SE  df null z.ratio
##  Unadj OR: Ergo Optim =N on Transient Pain =Y       3.15 1.43 Inf    1   2.525
##  p.value
##   0.0116
## 
## Tests are performed on the log odds ratio scale
      summary(tpain.eo.glm.adj)
## 
## Call:
## glm(formula = TPAIN ~ ERGO_OPTIM + TRNG_LEVEL, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5034   0.2984   0.3197   0.4984   0.8232  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)   
## (Intercept)           2.0230     0.7462   2.711  0.00671 **
## ERGO_OPTIM           -1.1149     0.4649  -2.398  0.01647 * 
## TRNG_LEVELFirst Yr    0.4247     0.7675   0.553  0.58003   
## TRNG_LEVELSecond Yr   1.0661     0.8205   1.299  0.19381   
## TRNG_LEVELThird Yr    0.9250     0.8206   1.127  0.25964   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 146.09  on 234  degrees of freedom
## Residual deviance: 137.16  on 230  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 147.16
## 
## Number of Fisher Scoring iterations: 5
      #Before running OR test, determine whether the multilevel categorical variable TRNG_LEVEL is significant overall?
      aod::wald.test(b = coef(tpain.eo.glm.adj), Sigma = vcov(tpain.eo.glm.adj), Terms = 3:5)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 2.5, df = 3, P(> X2) = 0.48
      contrast(tpain.eo.emmeans.adj, list("Adj OR: Ergo Optim =N on Transient Pain =Y" = c(1, -1)))
##  contrast                                   odds.ratio   SE  df null z.ratio
##  Adj OR: Ergo Optim =N on Transient Pain =Y       3.05 1.42 Inf    1   2.398
##  p.value
##   0.0165
## 
## Results are averaged over the levels of: TRNG_LEVEL 
## Tests are performed on the log odds ratio scale


  • Conclusion: Ergonomic Optimization is a significant effect before and after adjust for Training Level, which itself is not a significant effect. (The Odds Ratio in the adjusted model fell from 3.15 to 3.05, p= 0.0165. In other words, subjects who reported that their Endoscopy Suite environment was not ergonomically optimized were 3.05 times more likely to also report having experienced Transient Pain in one of the several body parts highlghted in the survey.)



  • 2. Does having a Sensitive Teacher with concern for your size/stature influence reports of Transient Pain, with and without adjusting for Training Level?


    tpain.steach.glm.unadj <- glm( TPAIN ~ SENSITIVE_TEACHER, data= TLEVEL, family= binomial(link="logit"))
    tpain.steach.emmeans.unadj <- emmeans( tpain.steach.glm.unadj,  ~ c(SENSITIVE_TEACHER), type= "response")
    tpain.steach.glm.adj   <- glm( TPAIN ~ SENSITIVE_TEACHER + TRNG_LEVEL, data= TLEVEL, family= binomial(link="logit"))
    tpain.steach.emmeans.adj <- emmeans( tpain.steach.glm.adj,  ~ c(SENSITIVE_TEACHER), type= "response")
    
    
    summary(tpain.steach.glm.unadj)
## 
## Call:
## glm(formula = TPAIN ~ SENSITIVE_TEACHER, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.3126   0.3780   0.3780   0.5026   0.5026  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)         2.6027     0.3664   7.103 1.22e-12 ***
## SENSITIVE_TEACHER  -0.5974     0.4640  -1.287    0.198    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.89  on 233  degrees of freedom
## Residual deviance: 144.18  on 232  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 148.18
## 
## Number of Fisher Scoring iterations: 5
    contrast(tpain.steach.emmeans.unadj, list("Unadj OR: Sensitive Teach =Y on Transient Pain =Y" = c(-1, 1)))
##  contrast                                          odds.ratio    SE  df null
##  Unadj OR: Sensitive Teach =Y on Transient Pain =Y       0.55 0.255 Inf    1
##  z.ratio p.value
##   -1.287  0.1980
## 
## Tests are performed on the log odds ratio scale
    contrast(tpain.steach.emmeans.unadj, list("Unadj OR: Sensitive Teach =N on Transient Pain =Y" = c(1, -1)))
##  contrast                                          odds.ratio    SE  df null
##  Unadj OR: Sensitive Teach =N on Transient Pain =Y       1.82 0.843 Inf    1
##  z.ratio p.value
##    1.287  0.1980
## 
## Tests are performed on the log odds ratio scale
    summary(tpain.steach.glm.adj)
## 
## Call:
## glm(formula = TPAIN ~ SENSITIVE_TEACHER + TRNG_LEVEL, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4822   0.3067   0.4080   0.4531   0.8498  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)  
## (Intercept)           1.5247     0.6804   2.241   0.0250 *
## SENSITIVE_TEACHER    -0.6920     0.4792  -1.444   0.1487  
## TRNG_LEVELFirst Yr    0.9197     0.7624   1.206   0.2277  
## TRNG_LEVELSecond Yr   1.5089     0.8129   1.856   0.0634 .
## TRNG_LEVELThird Yr    1.3921     0.8216   1.694   0.0902 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.69  on 232  degrees of freedom
## Residual deviance: 140.18  on 228  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 150.18
## 
## Number of Fisher Scoring iterations: 5
    #Before running OR test, determine whether the multilevel categorical variable TRNG_LEVEL is significant overall?
    aod::wald.test(b = coef(tpain.steach.glm.adj), Sigma = vcov(tpain.steach.glm.adj), Terms = 3:5)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 4.1, df = 3, P(> X2) = 0.25
    contrast(tpain.steach.emmeans.adj, list("Adj OR: Sensitive Teach =N on Transient Pain =Y" = c(1, -1)))
##  contrast                                        odds.ratio    SE  df null
##  Adj OR: Sensitive Teach =N on Transient Pain =Y          2 0.957 Inf    1
##  z.ratio p.value
##    1.444  0.1487
## 
## Results are averaged over the levels of: TRNG_LEVEL 
## Tests are performed on the log odds ratio scale


  • Conclusion: Sensitive Teacher is not a significant effect before (p=0.198) or after adjust for Training Level (p=0.149), which itself is not a significant effect. (In the adjusted model, the odds of experiencing Transient Pain if one did not have a Sensitive Teacher was 2.0, but with an insignificant p-value, cl=[0.781, 5.11])



  • 3. Does having a Sensitive Teacher with concern for your size/stature influence the use/encouragement of Ergonomic Optimization, with and without adjusting for Training Level?


    eo.steach.glm.unadj <- glm( ERGO_OPTIM ~ SENSITIVE_TEACHER, data= TLEVEL, family= binomial(link="logit"))
    eo.steach.emmeans.unadj <- emmeans( eo.steach.glm.unadj,  ~ c(SENSITIVE_TEACHER), type= "response")
    eo.steach.glm.adj   <- glm( ERGO_OPTIM ~ SENSITIVE_TEACHER + TRNG_LEVEL, data= TLEVEL, family= binomial(link="logit"))
    eo.steach.emmeans.adj <- emmeans( eo.steach.glm.adj,  ~ c(SENSITIVE_TEACHER), type= "response")
    
    
    summary(eo.steach.glm.unadj)
## 
## Call:
## glm(formula = ERGO_OPTIM ~ SENSITIVE_TEACHER, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.0080  -1.0080  -0.6809   1.3569   1.7751  
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## (Intercept)        -1.3437     0.2292  -5.863 4.56e-09 ***
## SENSITIVE_TEACHER   0.9312     0.2965   3.141  0.00168 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 287.23  on 233  degrees of freedom
## Residual deviance: 276.94  on 232  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 280.94
## 
## Number of Fisher Scoring iterations: 4
    contrast(eo.steach.emmeans.unadj, list("Unadj OR: Sensitive Teach =Y on Ergo Optim =Y" = c(-1, 1)))
##  contrast                                      odds.ratio    SE  df null
##  Unadj OR: Sensitive Teach =Y on Ergo Optim =Y       2.54 0.752 Inf    1
##  z.ratio p.value
##    3.141  0.0017
## 
## Tests are performed on the log odds ratio scale
    contrast(eo.steach.emmeans.unadj, list("Unadj OR: Sensitive Teach =N on Ergo Optim =Y" = c(1, -1)))
##  contrast                                      odds.ratio    SE  df null
##  Unadj OR: Sensitive Teach =N on Ergo Optim =Y      0.394 0.117 Inf    1
##  z.ratio p.value
##   -3.141  0.0017
## 
## Tests are performed on the log odds ratio scale
    summary(eo.steach.glm.adj)
## 
## Call:
## glm(formula = ERGO_OPTIM ~ SENSITIVE_TEACHER + TRNG_LEVEL, family = binomial(link = "logit"), 
##     data = TLEVEL)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1833  -0.9466  -0.6308   1.3515   1.8931  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          0.01384    0.55807   0.025 0.980213    
## SENSITIVE_TEACHER    1.03915    0.31117   3.340 0.000839 ***
## TRNG_LEVELFirst Yr  -1.62345    0.62676  -2.590 0.009591 ** 
## TRNG_LEVELSecond Yr -1.52756    0.62137  -2.458 0.013957 *  
## TRNG_LEVELThird Yr  -1.45344    0.63211  -2.299 0.021485 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 284.83  on 232  degrees of freedom
## Residual deviance: 267.99  on 228  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 277.99
## 
## Number of Fisher Scoring iterations: 4
    #Before running OR test, determine whether the multilevel categorical variable TRNG_LEVEL is significant overall?
    aod::wald.test(b = coef(eo.steach.glm.adj), Sigma = vcov(eo.steach.glm.adj), Terms = 3:5)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 7.0, df = 3, P(> X2) = 0.07
    contrast(eo.steach.emmeans.adj, list("Adj OR: Sensitive Teach =Y on Ergo Optim =Y" = c(-1, 1)))
##  contrast                                    odds.ratio   SE  df null z.ratio
##  Adj OR: Sensitive Teach =Y on Ergo Optim =Y       2.83 0.88 Inf    1   3.340
##  p.value
##   0.0008
## 
## Results are averaged over the levels of: TRNG_LEVEL 
## Tests are performed on the log odds ratio scale


  • Conclusion: Sensitive Teacher is a highly significant effect before, and an extremely highly significant effect after adjusting for Training Level. Training Level itself is not a significant effect. (The model summary shows that there is significance among three levels of TRNG_LEVEL *when compared to the reference category, “Advanced.”) Beta for ST increases slightly from +0.9312 to +1.03915, meaning that as Teacher Sensitivity increases, reports of Ergonomic Optimization also rise. (The Odds Ratio in the adjusted model rose from 2.54 to 2.83, p= 0.0008. In other words, subjects who reported that their Endoscopy Suite environment was ergonomically optimized were 2.83 times more likely to have had instructors who were sensitive to their physical size/stature.)

  • When compared to the reference category (“Advanced”), ALL other training levels were significantly different (i.e., lower Beta weights). Does this suggest that teachers are more likely to be sensitive to ergonomics when dealing with older trainees, or does it suggest that younger trainees don’t really know what proper ergonomics should entail and, therefore, don’t know how to rate their instructors on this metric? (Does it take rising to the Advanced level before a trainee fully understands what constitutes “proper ergonomics?”)



  • 4. First Name-No Permission does not appear to be correlated with Sex, but there does appear to be some correlation to Height Band. Is it significant?


  # Since height is a multilevel categorical variable, use the median Male height as the reference, 5, or 5'10-6'
  



FNAME <-
    CORRMATRIX %>% 
    select( SEX, AGECAT, RACE, HEIGHT, TRNG_LEVEL, FNAME_NP) %>% 
    mutate( SEX = factor(SEX),
            RACE = factor(RACE),
            HEIGHT = factor(HEIGHT),
            HEIGHT = relevel(HEIGHT, ref="5"),
            HTCAT = factor(HEIGHT),
            HTCAT = recode_factor(HEIGHT, '1'='0', '2'='0', '3'='0', .default = '1'),
            HTCAT = relevel(HTCAT, ref='1'),
            AGECAT = recode_factor(AGECAT, '1'='1', '2'='1', '3'='2', '4'='2'),
            AGECAT = relevel(AGECAT, ref="2"),
            
            TRNG_LEVEL = factor(TRNG_LEVEL),
            TRNG_LEVEL = relevel(TRNG_LEVEL, ref="4"))

    levels(FNAME$HEIGHT)
## [1] "5" "2" "3" "4" "6" "7"
    levels(FNAME$AGECAT)
## [1] "2" "1"
    fname.height.glm.unadj <- glm( FNAME_NP ~ HEIGHT, data= FNAME, family= binomial(link="logit"))
    fname.height.emmeans.unadj <- emmeans( fname.height.glm.unadj,  ~ c(HEIGHT), type= "response")
    fname.height.glm.adj   <- glm( FNAME_NP ~ HEIGHT + SEX , data= FNAME, family= binomial(link="logit"))
    fname.height.emmeans.adj <- emmeans( fname.height.glm.adj,  ~ c(HEIGHT, SEX), type= "response")
    
    
    summary(fname.height.glm.unadj)
## 
## Call:
## glm(formula = FNAME_NP ~ HEIGHT, family = binomial(link = "logit"), 
##     data = FNAME)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1774  -0.8806  -0.7090   1.3349   2.0184  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2528     0.3586  -3.494 0.000476 ***
## HEIGHT2       0.8899     0.4843   1.837 0.066151 .  
## HEIGHT3       0.5055     0.4587   1.102 0.270446    
## HEIGHT4       0.1335     0.4599   0.290 0.771534    
## HEIGHT6      -0.6444     0.7155  -0.901 0.367800    
## HEIGHT7       1.2528     1.0623   1.179 0.238300    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 275.21  on 231  degrees of freedom
## Residual deviance: 266.71  on 226  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 278.71
## 
## Number of Fisher Scoring iterations: 4
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height2 (5-5'3) on FNAME_NP =Y relative to 5'10-6'" =   c(-1, 1, 0, 0, 0, 0)))
##  contrast                                                     odds.ratio   SE
##  Unadj OR: Height2 (5-5'3) on FNAME_NP =Y relative to 5'10-6'       2.43 1.18
##   df null z.ratio p.value
##  Inf    1   1.837  0.0662
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height3 (5'4-5'6) on FNAME_NP =Y relative to 5'10-6'" = c(-1, 0, 1, 0, 0, 0)))
##  contrast                                                       odds.ratio
##  Unadj OR: Height3 (5'4-5'6) on FNAME_NP =Y relative to 5'10-6'       1.66
##     SE  df null z.ratio p.value
##  0.761 Inf    1   1.102  0.2704
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height4 (5'7-5'9) on FNAME_NP =Y relative to 5'10-6'" = c(-1, 0, 0, 1, 0, 0)))
##  contrast                                                       odds.ratio
##  Unadj OR: Height4 (5'7-5'9) on FNAME_NP =Y relative to 5'10-6'       1.14
##     SE  df null z.ratio p.value
##  0.526 Inf    1   0.290  0.7715
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height6 (6'-6'4) on FNAME_NP =Y relative to 5'10-6'" = c(-1, 0, 0, 0, 1, 0)))
##  contrast                                                      odds.ratio    SE
##  Unadj OR: Height6 (6'-6'4) on FNAME_NP =Y relative to 5'10-6'      0.525 0.376
##   df null z.ratio p.value
##  Inf    1  -0.901  0.3678
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.height.emmeans.unadj, list("Unadj OR: Height7 (< 6'4) on FNAME_NP =Y relative to 5'10-6'" = c(-1, 0, 0, 0, 0, 1)))
##  contrast                                                     odds.ratio   SE
##  Unadj OR: Height7 (< 6'4) on FNAME_NP =Y relative to 5'10-6'        3.5 3.72
##   df null z.ratio p.value
##  Inf    1   1.179  0.2383
## 
## Tests are performed on the log odds ratio scale
    #Determine whether the multilevel categorical variable HEIGHT is significant overall in the UNADJUSTED model?
    aod::wald.test(b = coef(fname.height.glm.unadj), Sigma = vcov(fname.height.glm.unadj), Terms = 2:6)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 8.0, df = 5, P(> X2) = 0.16
    summary(fname.height.glm.adj)
## 
## Call:
## glm(formula = FNAME_NP ~ HEIGHT + SEX, family = binomial(link = "logit"), 
##     data = FNAME)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.1905  -0.8766  -0.7094   1.3363   2.0184  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.25141    0.35869  -3.489 0.000485 ***
## HEIGHT2      0.94709    0.62448   1.517 0.129369    
## HEIGHT3      0.55483    0.57039   0.973 0.330686    
## HEIGHT4      0.15571    0.48414   0.322 0.747744    
## HEIGHT6     -0.64571    0.71553  -0.902 0.366838    
## HEIGHT7      1.28229    1.08178   1.185 0.235877    
## SEX1        -0.06176    0.42540  -0.145 0.884565    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 275.21  on 231  degrees of freedom
## Residual deviance: 266.69  on 225  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 280.69
## 
## Number of Fisher Scoring iterations: 4
    #summary(fname.height.emmeans.adj)
    
    
    #Determine whether the multilevel categorical variable HEIGHT is significant overall in the ADJUSTED model?
    aod::wald.test(b = coef(fname.height.glm.adj), Sigma = vcov(fname.height.glm.adj), Terms = 2:6)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 5.3, df = 5, P(> X2) = 0.38
    # Just for fun, adjust only for SEX
     
    fname.sex.glm.unadj <- glm( FNAME_NP ~ SEX , data= FNAME, family= binomial(link="logit"))
    fname.sex.emmeans.unadj <- emmeans( fname.sex.glm.unadj,  ~ c(SEX), type= "response")
    
    
    summary(fname.sex.glm.unadj)
## 
## Call:
## glm(formula = FNAME_NP ~ SEX, family = binomial(link = "logit"), 
##     data = FNAME)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.8956  -0.8956  -0.7255   1.4883   1.7109  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -1.2004     0.2156  -5.569 2.57e-08 ***
## SEX1          0.4938     0.2947   1.676   0.0938 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 275.86  on 232  degrees of freedom
## Residual deviance: 273.03  on 231  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 277.03
## 
## Number of Fisher Scoring iterations: 4
    #Confirm that Sex is not significant (trend with  0.10 > p-value > 0.05)
    aod::wald.test(b = coef(fname.sex.glm.unadj), Sigma = vcov(fname.sex.glm.unadj), Terms = 2:2)
## Wald test:
## ----------
## 
## Chi-squared test:
## X2 = 2.8, df = 1, P(> X2) = 0.094
    contrast(fname.sex.emmeans.unadj, list("Unadj OR: Females vs. Males FNAME_NP =Y" =   c(-1, 1)))
##  contrast                                odds.ratio    SE  df null z.ratio
##  Unadj OR: Females vs. Males FNAME_NP =Y       1.64 0.483 Inf    1   1.676
##  p.value
##   0.0938
## 
## Tests are performed on the log odds ratio scale
    #Use sjPlots to display resutls for all three models side-by-side: Height Only,  Height + Sex, Sex Only
    
    tab_model(fname.height.glm.unadj, fname.height.glm.adj, fname.sex.glm.unadj, title= "First Name - No Permission Logistic Models with Height Bands as Given<br>.",
              dv.labels= c('Height Cat Unadj','Height Cat Adj Sex', 'Sex Unadj'))
First Name - No Permission Logistic Models with Height Bands as Given
.
  Height Cat Unadj Height Cat Adj Sex Sex Unadj
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
(Intercept) 0.29 0.13 – 0.56 <0.001 0.29 0.13 – 0.56 <0.001 0.30 0.19 – 0.45 <0.001
HEIGHT [2] 2.43 0.95 – 6.46 0.066 2.58 0.76 – 8.93 0.129
HEIGHT [3] 1.66 0.68 – 4.19 0.270 1.74 0.57 – 5.41 0.331
HEIGHT [4] 1.14 0.47 – 2.89 0.772 1.17 0.45 – 3.08 0.748
HEIGHT [6] 0.53 0.11 – 1.96 0.368 0.52 0.11 – 1.95 0.367
HEIGHT [7] 3.50 0.38 – 32.36 0.238 3.60 0.38 – 34.46 0.236
SEX [1] 0.94 0.41 – 2.18 0.885 1.64 0.92 – 2.94 0.094
Observations 232 232 233
R2 Tjur 0.036 0.036 0.012
    #Run additional on the collapsed Height Cat variable and see what happens
    
        
    fname.htcat.glm.unadj <- glm( FNAME_NP ~ HTCAT   , data= FNAME, family= binomial(link="logit"))
    fname.htcat.emmeans.unadj <- emmeans( fname.htcat.glm.unadj,  ~ c(HTCAT), type= "response")
    
    pairs(fname.htcat.emmeans.unadj)
##  contrast        odds.ratio    SE  df null z.ratio p.value
##  HTCAT1 / HTCAT0      0.525 0.155 Inf    1  -2.180  0.0292
## 
## Tests are performed on the log odds ratio scale
    contrast(fname.htcat.emmeans.unadj, list("Unadj OR: Height < 5'7 on FNAME_NP =Y relative to 5'7+ " =   c(-1, 1)))
##  contrast                                                odds.ratio    SE  df
##  Unadj OR: Height < 5'7 on FNAME_NP =Y relative to 5'7+        1.91 0.564 Inf
##  null z.ratio p.value
##     1   2.180  0.0292
## 
## Tests are performed on the log odds ratio scale
    fname.htcat.glm.adj <- glm( FNAME_NP ~ HTCAT+ TRNG_LEVEL , data= FNAME, family= binomial(link="logit"))
    fname.htcat.emmeans.adj <- emmeans( fname.htcat.glm.adj,  ~ c(HTCAT), type= "response")
    
    fname.htcat.glm.adj2 <- glm( FNAME_NP ~ HTCAT+ RACE , data= FNAME, family= binomial(link="logit"))
    fname.htcat.emmeans.adj2 <- emmeans
    
    fname.htcat.glm.adj3 <- glm( FNAME_NP ~ HTCAT+ SEX , data= FNAME, family= binomial(link="logit"))
    fname.htcat.emmeans.adj3 <- emmeans( fname.htcat.glm.adj,  ~ c(HTCAT), type= "response")

    tab_model(fname.htcat.glm.unadj, fname.htcat.glm.adj, fname.htcat.glm.adj2,
              title= "First Name - No Permission Logistic Models with Height Bands Collapsed (2) <br>.",
              dv.labels= c('Height Cat Unadj','Height Cat Adj Training Level', 'Height Cat Adj Race'))
First Name - No Permission Logistic Models with Height Bands Collapsed (2)
.
  Height Cat Unadj Height Cat Adj Training Level Height Cat Adj Race
Predictors Odds Ratios CI p Odds Ratios CI p Odds Ratios CI p
(Intercept) 0.29 0.19 – 0.43 <0.001 0.21 0.05 – 0.70 0.021 0.29 0.18 – 0.47 <0.001
HTCAT [0] 1.91 1.07 – 3.42 0.029 1.88 1.04 – 3.42 0.036 1.91 1.06 – 3.46 0.032
TRNG LEVEL [1] 1.49 0.41 – 7.11 0.569
TRNG LEVEL [2] 1.54 0.43 – 7.36 0.537
TRNG LEVEL [3] 1.09 0.29 – 5.35 0.907
RACE [1] 0.99 0.55 – 1.81 0.984
Observations 232 231 232
R2 Tjur 0.021 0.027 0.021


  • Conclusion: When compared to the median Male height (HEIGHT=5, or 5’10-6’), the odds of the subjects’ first name being used without permission decreases as the reference height is approached. Subjects who were taller, however, had mixed results: Ones who were one band above the reference height (6’-6’4), had reduced odds of being disrespected in this manner; but the tallest subjects (HEIGHT=7, or > 6’4) had the worst odds ratio of all, 3.5, a group that is singularly comprised of males (n=2). Unfortunately, none of these results was statistically significant.

  • Even after adjusting for SEX, the significance of HEIGHT in the model did not improve (in fact, it worsened, likely because the variables HEIGHT and SEX are highly correlated). An unadjusetd model where SEX replaced HEIGHT as the predictor variable, yielded good results, but still not significant (p= 0.0938).

  • If the Height Category is collapsed into two groups – shorter than 5’7” and those who are 5’7” and above, there does appear to be some significance – even if controlled separately for Training Level and Race. Only controlling for Sex knocks this predictor out of sigificance, likely because the HEIGHT and SEX are so highly correlated.





PLAYGROUND (FOR TESTING IDEAS)


Question 1: Is it useful to know a “Pain Score”?

PAIN_SCORE variable is created by turing six questions into numeric variables with binary values:
  • Sustained endoscopy-related injury (Yes = 1, No/NA = 0)
  • Transient pain experienced in hand (Yes = 1, No/NA = 0)
  • Transient pain experience in neck/shoulders (Yes = 1, No/NA = 0)
  • Transient pain experienced in back (Yes = 1, No/NA = 0)
  • Transient pain experienced in legs (Yes = 1, No/NA = 0)
  • Transient pain experienced in feet (Yes = 1, No/NA = 0)


PAIN <-

SURVEY %>% 
    mutate( BIRTHSEX2 = case_when( BIRTHSEX == "M" ~ 0,
                                      TRUE ~ 1 ),
      
            PAIN_INJ = case_when (EVER_INJURED == 'Y' ~ 1,
                                  TRUE ~ 0 ),
            
            PAIN_HAND = case_when (EXPERIENCED_TRANSIENT_PAIN_HAND == "Y" ~ 1,
                                   TRUE ~ 0),

            PAIN_NECK = case_when (EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER == "Y" ~ 1,
                                   TRUE ~ 0),

            PAIN_BACK = case_when (EXPERIENCED_TRANSIENT_PAIN_BACK == "Y" ~ 1,
                                   TRUE ~ 0),

            PAIN_LEG = case_when (EXPERIENCED_TRANSIENT_PAIN_LEG == "Y" ~ 1,
                                   TRUE ~ 0),

            PAIN_FOOT = case_when (EXPERIENCED_TRANSIENT_PAIN_FOOT == "Y" ~ 1,
                                   TRUE ~ 0)) %>% 
            rowwise() %>% 
            mutate(PAIN_SCORE = sum(PAIN_INJ+ PAIN_HAND+ PAIN_NECK+ PAIN_BACK + PAIN_LEG+ PAIN_FOOT)) %>% 
  
    select( RECORD_ID, BIRTHSEX, BIRTHSEX2, RACE, RACE2, TRAINING_LEVEL, EVER_INJURED, starts_with("PAIN_") )



# Test for normality of distribution (will be sensitive to outliers)
shapiro.test(PAIN$PAIN_SCORE)
## 
##  Shapiro-Wilk normality test
## 
## data:  PAIN$PAIN_SCORE
## W = 0.89272, p-value = 6.5e-12
# Group Means & Standard Deviations
PAIN %>% 
  group_by(BIRTHSEX) %>% 
  summarize( PAINMEAN = mean(PAIN_SCORE),
             PAINSD = sd(PAIN_SCORE))
## # A tibble: 2 × 3
##   BIRTHSEX PAINMEAN PAINSD
##   <fct>       <dbl>  <dbl>
## 1 F            2.27   1.39
## 2 M            1.76   1.33
# Welch's T-test

eov.ttest(PAIN, PAIN_SCORE, BIRTHSEX)
## [1] "F Test p.value = 0.6449448  EOV = TRUE (Pooled)"
## [1] "PAIN : PAIN_SCORE ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  PAIN : PAIN_SCORE ~ BIRTHSEX
## t = 2.8305, df = 234, p-value = 0.005051
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  0.1523617 0.8501565
## sample estimates:
## mean in group F mean in group M 
##        2.265487        1.764228
# Violin Plot to explore differences between sexes
ggbetweenstats( data= PAIN,
                x = BIRTHSEX,
                y = PAIN_SCORE,
                type="parametric",
                p.adjust.method = "none",
                title = "Mean Pain/Injury Scores by Birth Sex")

PAIN %>%
ggplot(aes( x= PAIN_SCORE, y= stat(density), fill= BIRTHSEX))+
geom_density( alpha=0.5, position = "identity" ) +
scale_x_continuous(breaks = scales::breaks_width(1.0) ) +
scale_y_continuous(breaks = scales::breaks_width(.1))+
scale_fill_manual(values = c("red","darkgreen"), name ="Birth Sex", labels = c("Female","Male"))+
xlab("Pain Score")+ ylab("Density")+
ggtitle("Pain Score Density Curve by Sex") +
theme_538()

corr.test(  PAIN$BIRTHSEX2,PAIN$PAIN_SCORE, method= 'pearson')
## Call:corr.test(x = PAIN$BIRTHSEX2, y = PAIN$PAIN_SCORE, method = "pearson")
## Correlation matrix 
## [1] 0.18
## Sample Size 
## [1] 236
## These are the unadjusted probability values.
##   The probability values  adjusted for multiple tests are in the p.adj object. 
## [1] 0.01
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option


Question 2: Is it useful to know a “Training Score”?

TRNG_SCORE variable is created by turing the questions into numeric variables with di- or polychotomous values:
  • Formal Ergo Training (Yes=1, No=0)
  • Informal Ergo Training, (Yes=1, No=0)
  • Posture Awareness (Yes=1, No=0)
  • Bed Height Adjustment (Yes=1, No=0)
  • Bed Angle Position (Yes=1, No=0)
  • Monitor Height Adjustment (Yes=1, No=0)
  • Special Maneuvers/Injury Avoidance (Yes=1, No=0)
  • Strength-Building Exercises (Yes=1, No=0)
  • Use of Dial Extenders (Yes=1, default=0)
  • Use of Pedi Colonoscopes (Yes=1, No=0)
  • Ergo Feedback from Teachers (Often=1, Sometimes=1, Rarely=0, Never=0)
  • Equipment optimizatIon (Yes=1, default=0)
  • Glove Size Available (Yes=1, default=0)
  • Dial Extenders Available (Yes=1, default=0)
  • Encouraged to Use Dial Ext (Yes=1, default=0)
  • Pediatric Colonoscopes Available (Yes=1, default=0)
  • Lightweight 1P Aprons Available (Yes=1, default=0)
  • Lightweight 2P Aprons Available (Yes=1, default=0)
  • Well-fitted Aprons Available (Yes=1, default=0)
  • Formal Ergo Time-Out (Yes=1, No=0)
  • Informal Ergo Time-Out (Yes=1, No=0)
  • Monitors Easily Adjustable (Yes=1, No=0)
  • Teachers Sensitivity During Training (Yes=1, No=0)
  • Received Tactile Instruction from Males (Often=1, default=0)
  • Received Tactile Instruction from Females (Often=1, default=0)


# CREATE TRAINING INDEX 

TRAINING <-
  
  SURVEY %>% 
  mutate( BIRTHSEX2 = case_when( is.na(BIRTHSEX) ~ NA_real_,
                                 BIRTHSEX == "M" ~ 0,
                                 TRUE ~ 1 ),
          
         # AGE2 = factor(AGE2, levels=rev(levels(AGE2))),
          
          AGENUM  = case_when (AGE2 == "< 30" ~ 1,
                               AGE2 == "30-34" ~2,
                               AGE2 == "35-40" ~3,
                               TRUE ~ 4),
         
         
          TRNG_FORMAL = case_when (is.na(FELLOWSHIP_FORMAL_ERGO_TRAINING) ~ NA_real_,
                                   FELLOWSHIP_FORMAL_ERGO_TRAINING == 'Y' ~ 1,
                                TRUE ~ 0 ),
          
          TRNG_INFORMAL = case_when (is.na(INFORMAL_TRAINING) ~ NA_real_,
                                     INFORMAL_TRAINING == "Y" ~ 1,
                                 TRUE ~ 0),
          
          TRNG_POSTURAL = case_when (is.na(TRAINING_TECHNIQUES_POSTURAL) ~ NA_real_,
                                    TRAINING_TECHNIQUES_POSTURAL == "Y" ~ 1,
                                 TRUE ~ 0),
          
          TRNG_BEDHT = case_when (is.na(TRAINING_TECHNIQUES_BEDHEIGHT) ~ NA_real_,
                                  TRAINING_TECHNIQUES_BEDHEIGHT == "Y" ~ 1,
                                 TRUE ~ 0),
          
          TRNG_BEDANG = case_when (is.na(TRAINING_TECHNIQUES_BEDANGLE) ~ NA_real_,
                                   TRAINING_TECHNIQUES_BEDANGLE == "Y" ~ 1,
                                TRUE ~ 0),
          
          TRNG_MONITOR = case_when (is.na(TRAINING_TECHNIQUES_MONITORHEIGHT) ~ NA_real_,
                                    TRAINING_TECHNIQUES_MONITORHEIGHT == "Y" ~ 1,
                                 TRUE ~ 0),
          
          TRNG_MANEUVERS = case_when (is.na(TRAINING_TECHNIQUES_MUSCULOSKELETAL) ~ NA_real_,
                                      TRAINING_TECHNIQUES_MUSCULOSKELETAL == "Y" ~ 1,
                                    TRUE ~ 0) ,

          TRNG_EXERCISE = case_when (is.na(TRAINING_TECHNIQUES_EXERCISE_STRETCHING) ~ NA_real_,
                                     TRAINING_TECHNIQUES_EXERCISE_STRETCHING == "Y" ~ 1,
                                    TRUE ~ 0),
          
          TRNG_DIALEXT= case_when (is.na(TRAINING_TECHNIQUES_DIAL_EXTENDERS) ~ NA_real_,
                                   TRAINING_TECHNIQUES_DIAL_EXTENDERS == "Y" ~ 1,
                                   TRUE ~ 0),
          
          TRNG_PEDISCOPE= case_when (is.na(TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE) ~ NA_real_,
                                     TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE == "Y" ~ 1,
                                   TRUE ~ 0),
          
          TRNG_FEEDBACK= case_when (is.na(ERGO_FEEDBACK) ~ NA_real_,
                                    ERGO_FEEDBACK == "Often" ~ 1,
                                    ERGO_FEEDBACK == "Sometimes" ~ 1,
                                    TRUE ~ 0),
          
          TRNG_OPTIM= case_when (is.na(ERGO_OPTIMIZATION) ~ NA_real_,
                                 ERGO_OPTIMIZATION == "Y" ~ 1,
                                 ERGO_OPTIMIZATION == "N" ~ 0,
                                TRUE ~ 0),
          
          TRNG_GLOVE_AVAIL = case_when (is.na(GLOVE_SIZE_AVAILABLE) ~ NA_real_,
                                        GLOVE_SIZE_AVAILABLE == "Y" ~ 1,
                                 TRUE ~ 0),
          
          TRNG_DIALEXT_AVAIL= case_when (is.na(DIAL_EXTENDERS_AVAILABLE) ~ NA_real_,
                                         DIAL_EXTENDERS_AVAILABLE == "Y" ~ 1,
                                        TRUE ~ 0),
          
          TRNG_DIALEXT_ENC= case_when (is.na(DIAL_EXTENDERS_ENCOURAGED) ~ NA_real_,
                                       DIAL_EXTENDERS_ENCOURAGED == "Y" ~ 1,
                                                                    TRUE ~ 0),
          
          TRNG_PEDISCOPE_AVAIL = case_when (is.na(PEDI_COLONOSCOPES_AVAILABLE) ~ NA_real_,
                                            PEDI_COLONOSCOPES_AVAILABLE == "Y" ~ 1,
                                        TRUE ~ 0),
          
          TRNG_APRON_LW1P = case_when (is.na(LEAD_APRONS_LW_ONEPIECE) ~ NA_real_,
                                               LEAD_APRONS_LW_ONEPIECE == "Y" ~ 1,
                                            TRUE ~ 0),
          
          TRNG_APRON_LW2P = case_when (is.na(LEAD_APRONS_LW_TWOPIECE) ~ NA_real_,
                                       LEAD_APRONS_LW_TWOPIECE == "Y" ~ 1,
                                       TRUE ~ 0),

          TRNG_TO_FORMAL= case_when (is.na(ERGO_FORMAL_TIMEOUT_PRIOR) ~ NA_real_,
                                     ERGO_FORMAL_TIMEOUT_PRIOR == "Y" ~ 1,
                                            TRUE ~ 0),
          
          TRNG_TO_INFORMAL= case_when (is.na(ERGO_INFORMAL_TIMEOUT_PRIOR) ~ NA_real_,
                                       ERGO_INFORMAL_TIMEOUT_PRIOR == "Y" ~ 1,
                                     TRUE ~ 0),

          TRNG_MONITORS_EASYADJ = case_when (is.na(MONITORS_ADJUSTABLE) ~ NA_real_,
                                             MONITORS_ADJUSTABLE == "Y" ~ 1,
                                     TRUE ~ 0),
          
          TRNG_TRAINERS_SENSITIVITY = case_when (is.na(TEACHER_SENSITIVITY_STATURE_HANDSIZE) ~ NA_real_,
                                                 TEACHER_SENSITIVITY_STATURE_HANDSIZE == "Y" ~ 1,
                                                  TRUE ~ 0),
          
          
          TRNG_TACTILE_MALES = case_when (is.na(TACTILE_INSTRUCTION_FROM_MALES) ~ NA_real_,
                                          TACTILE_INSTRUCTION_FROM_MALES == "Often" ~ 1,
                                                 TRUE ~ 0),
          
          TRNG_TACTILE_FEMALES = case_when (is.na(TACTILE_INSTRUCTION_FROM_FEMALES) ~ NA_real_,
                                            TACTILE_INSTRUCTION_FROM_FEMALES == "Often" ~ 1,
                                          TRUE ~ 0))  %>% 
  
  rowwise() %>% 
  mutate(TRNG_SCORE = sum(TRNG_FORMAL+ TRNG_INFORMAL+ TRNG_POSTURAL+ TRNG_BEDHT+ +TRNG_BEDANG+ TRNG_MONITOR+ TRNG_MANEUVERS+ TRNG_EXERCISE+ TRNG_DIALEXT+
                          TRNG_PEDISCOPE+ TRNG_FEEDBACK+ TRNG_OPTIM+ TRNG_GLOVE_AVAIL+ TRNG_DIALEXT_AVAIL+ TRNG_DIALEXT_ENC+ TRNG_PEDISCOPE_AVAIL+
                            TRNG_APRON_LW1P+ TRNG_APRON_LW2P+ TRNG_TO_FORMAL+ TRNG_TO_INFORMAL+ TRNG_MONITORS_EASYADJ + TRNG_TRAINERS_SENSITIVITY+
                          TRNG_TACTILE_MALES+ TRNG_TACTILE_FEMALES),
                          
          TRNG_SCORE2 = sum(TRNG_FORMAL+ TRNG_INFORMAL+ TRNG_POSTURAL+ TRNG_BEDHT+ +TRNG_BEDANG+ TRNG_MONITOR+ TRNG_MANEUVERS+ TRNG_EXERCISE+ TRNG_DIALEXT+
                              TRNG_PEDISCOPE+ TRNG_FEEDBACK+ TRNG_TO_FORMAL+ TRNG_TO_INFORMAL+  TRNG_TRAINERS_SENSITIVITY+
                              TRNG_TACTILE_MALES+ TRNG_TACTILE_FEMALES) ,
         
          EQUIP_SCORE = sum(TRNG_OPTIM+ TRNG_GLOVE_AVAIL+ TRNG_DIALEXT_AVAIL+ TRNG_DIALEXT_ENC+ TRNG_PEDISCOPE_AVAIL+
                              TRNG_APRON_LW1P+ TRNG_APRON_LW2P + TRNG_TRAINERS_SENSITIVITY+ TRNG_MONITORS_EASYADJ
                              )) %>% 
  
  select( RECORD_ID, BIRTHSEX, BIRTHSEX2, AGE2, AGENUM, RACE, RACE2, TRAINING_LEVEL, starts_with("TRNG_"), starts_with("EQUIP_") )







TRAINING %>% 
  group_by(BIRTHSEX) %>% 
  summarize( TRNGMEAN = mean(TRNG_SCORE, na.rm=T),
             TRNGSD = sd(TRNG_SCORE, na.rm=T))
## # A tibble: 2 × 3
##   BIRTHSEX TRNGMEAN TRNGSD
##   <fct>       <dbl>  <dbl>
## 1 F            11.9   3.71
## 2 M            12.3   3.36
shapiro.test(TRAINING$TRNG_SCORE)
## 
##  Shapiro-Wilk normality test
## 
## data:  TRAINING$TRNG_SCORE
## W = 0.9831, p-value = 0.007635
eov.ttest(TRAINING, TRNG_SCORE, BIRTHSEX)
## [1] "F Test p.value = 0.29308  EOV = TRUE (Pooled)"
## [1] "TRAINING : TRNG_SCORE ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  TRAINING : TRNG_SCORE ~ BIRTHSEX
## t = -0.79713, df = 228, p-value = 0.4262
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -1.2890501  0.5464879
## sample estimates:
## mean in group F mean in group M 
##        11.90741        12.27869
ggbetweenstats( data= TRAINING,
                x = BIRTHSEX,
                y = TRNG_SCORE,
                type="parametric",
                p.adjust.method = "none",
                title = "Mean Training Scores by Birth Sex")

corr.test(  TRAINING$BIRTHSEX2,TRAINING$TRNG_SCORE, method= 'pearson', ci=T)
## Call:corr.test(x = TRAINING$BIRTHSEX2, y = TRAINING$TRNG_SCORE, method = "pearson", 
##     ci = T)
## Correlation matrix 
## [1] -0.05
## Sample Size 
## [1] 230
## These are the unadjusted probability values.
##   The probability values  adjusted for multiple tests are in the p.adj object. 
## [1] 0.43
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option


Simple Regressions to Predict Pain Score (Likelihood to Sustain Pain/Injury)


# BIRTHSEX predicting PAIN_SCORE

summary(lm(PAIN_SCORE ~ BIRTHSEX , data = SCORES))
## 
## Call:
## lm(formula = PAIN_SCORE ~ BIRTHSEX, data = SCORES)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.2655 -0.7642 -0.2655  0.7345  4.2358 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.2655     0.1278  17.720  < 2e-16 ***
## BIRTHSEXM    -0.5013     0.1771  -2.831  0.00505 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.359 on 234 degrees of freedom
## Multiple R-squared:  0.0331, Adjusted R-squared:  0.02897 
## F-statistic: 8.012 on 1 and 234 DF,  p-value: 0.005051
# BIRTHSEX predicting PAIN_SCORE when controlled for EQUIP_SCORE

summary(lm(PAIN_SCORE ~  BIRTHSEX + EQUIP_SCORE , data = SCORES))
## 
## Call:
## lm(formula = PAIN_SCORE ~ BIRTHSEX + EQUIP_SCORE, data = SCORES)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.4855 -0.9155 -0.3722  0.6232  4.0845 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.70281    0.25705  10.515   <2e-16 ***
## BIRTHSEXM   -0.46136    0.18222  -2.532   0.0120 *  
## EQUIP_SCORE -0.10866    0.05857  -1.855   0.0649 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.358 on 229 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.05059,    Adjusted R-squared:  0.0423 
## F-statistic: 6.102 on 2 and 229 DF,  p-value: 0.00262
# BIRTHSEX predicting PAIN_SCORE when controlled for EQUIP_SCORE & AGEBAND


summary(lm(PAIN_SCORE ~  BIRTHSEX + EQUIP_SCORE + AGE2 , data = SCORES))
## 
## Call:
## lm(formula = PAIN_SCORE ~ BIRTHSEX + EQUIP_SCORE + AGE2, data = SCORES)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.5805 -0.9306 -0.2907  0.6265  4.1546 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.16251    0.39371   5.493 1.06e-07 ***
## BIRTHSEXM   -0.41806    0.18322  -2.282   0.0234 *  
## EQUIP_SCORE -0.11591    0.05824  -1.990   0.0478 *  
## AGE230-34    0.64978    0.34264   1.896   0.0592 .  
## AGE235-40    0.33279    0.40318   0.825   0.4100    
## AGE2> 40    -1.28081    1.39037  -0.921   0.3579    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.348 on 226 degrees of freedom
##   (4 observations deleted due to missingness)
## Multiple R-squared:  0.07705,    Adjusted R-squared:  0.05663 
## F-statistic: 3.774 on 5 and 226 DF,  p-value: 0.00266
anova(lm(PAIN_SCORE ~ BIRTHSEX + EQUIP_SCORE + AGE2 , data = SCORES))
## Analysis of Variance Table
## 
## Response: PAIN_SCORE
##              Df Sum Sq Mean Sq F value   Pr(>F)   
## BIRTHSEX      1  16.16 16.1637  8.8951 0.003173 **
## EQUIP_SCORE   1   6.35  6.3481  3.4934 0.062907 . 
## AGE2          3  11.77  3.9245  2.1597 0.093607 . 
## Residuals   226 410.68  1.8172                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# COMPARE how M/F PAIN_SCORE means differ in unadjusted and fully adjusted model


library(emmeans)

#unadjusted model
pscore.unadj <- emmeans(lm(PAIN_SCORE ~ BIRTHSEX , data = SCORES), specs=  "BIRTHSEX")
summary(pscore.unadj)
##  BIRTHSEX emmean    SE  df lower.CL upper.CL
##  F          2.27 0.128 234     2.01     2.52
##  M          1.76 0.123 234     1.52     2.01
## 
## Confidence level used: 0.95
contrast(pscore.unadj, list(FvM = c(1,-1)))
##  contrast estimate    SE  df t.ratio p.value
##  FvM         0.501 0.177 234   2.831  0.0051
#fully adjusted model
pscore.adj <- emmeans(lm(PAIN_SCORE ~ BIRTHSEX + EQUIP_SCORE + AGE2 , data = SCORES), specs=  "BIRTHSEX")
summary(pscore.adj)
##  BIRTHSEX emmean    SE  df lower.CL upper.CL
##  F          1.61 0.373 226    0.875     2.35
##  M          1.19 0.358 226    0.488     1.90
## 
## Results are averaged over the levels of: AGE2 
## Confidence level used: 0.95
contrast(pscore.adj, list(FvM = c(1,-1)))
##  contrast estimate    SE  df t.ratio p.value
##  FvM         0.418 0.183 226   2.282  0.0234
## 
## Results are averaged over the levels of: AGE2
#Compare all combinations of BIRTHSEX-AGE2 to see if any sliver is significantly different (after Benjamini-Hochberg correction for multiple comparisons)

pscore.adj2 <- emmeans(lm(PAIN_SCORE ~ BIRTHSEX + EQUIP_SCORE + AGE2 , data = SCORES), specs=  c("BIRTHSEX","AGE2"))
summary(pscore.adj2)
##  BIRTHSEX AGE2   emmean    SE  df lower.CL upper.CL
##  F        < 30   1.6854 0.340 226    1.016     2.35
##  M        < 30   1.2673 0.341 226    0.596     1.94
##  F        30-34  2.3352 0.135 226    2.069     2.60
##  M        30-34  1.9171 0.137 226    1.648     2.19
##  F        35-40  2.0182 0.264 226    1.498     2.54
##  M        35-40  1.6001 0.238 226    1.131     2.07
##  F        > 40   0.4046 1.361 226   -2.277     3.09
##  M        > 40  -0.0135 1.348 226   -2.670     2.64
## 
## Confidence level used: 0.95
pairs(pscore.adj2, adjust="bh", type="response")
##  contrast              estimate    SE  df t.ratio p.value
##  F < 30 - M < 30         0.4181 0.183 226   2.282  0.1093
##  F < 30 - (F 30-34)     -0.6498 0.343 226  -1.896  0.2071
##  F < 30 - (M 30-34)     -0.2317 0.388 226  -0.597  0.5938
##  F < 30 - (F 35-40)     -0.3328 0.403 226  -0.825  0.4783
##  F < 30 - (M 35-40)      0.0853 0.427 226   0.200  0.8419
##  F < 30 - F > 40         1.2808 1.390 226   0.921  0.4772
##  F < 30 - M > 40         1.6989 1.390 226   1.222  0.3730
##  M < 30 - (F 30-34)     -1.0678 0.389 226  -2.747  0.1093
##  M < 30 - (M 30-34)     -0.6498 0.343 226  -1.896  0.2071
##  M < 30 - (F 35-40)     -0.7509 0.458 226  -1.639  0.2872
##  M < 30 - (M 35-40)     -0.3328 0.403 226  -0.825  0.4783
##  M < 30 - F > 40         0.8627 1.415 226   0.610  0.5938
##  M < 30 - M > 40         1.2808 1.390 226   0.921  0.4772
##  (F 30-34) - (M 30-34)   0.4181 0.183 226   2.282  0.1093
##  (F 30-34) - (F 35-40)   0.3170 0.255 226   1.243  0.3730
##  (F 30-34) - (M 35-40)   0.7351 0.291 226   2.522  0.1093
##  (F 30-34) - F > 40      1.9306 1.355 226   1.425  0.3352
##  (F 30-34) - M > 40      2.3487 1.355 226   1.734  0.2624
##  (M 30-34) - (F 35-40)  -0.1011 0.335 226  -0.302  0.7913
##  (M 30-34) - (M 35-40)   0.3170 0.255 226   1.243  0.3730
##  (M 30-34) - F > 40      1.5125 1.380 226   1.096  0.4041
##  (M 30-34) - M > 40      1.9306 1.355 226   1.425  0.3352
##  (F 35-40) - (M 35-40)   0.4181 0.183 226   2.282  0.1093
##  (F 35-40) - F > 40      1.6136 1.369 226   1.179  0.3730
##  (F 35-40) - M > 40      2.0317 1.374 226   1.479  0.3352
##  (M 35-40) - F > 40      1.1955 1.389 226   0.861  0.4783
##  (M 35-40) - M > 40      1.6136 1.369 226   1.179  0.3730
##  F > 40 - M > 40         0.4181 0.183 226   2.282  0.1093
## 
## P value adjustment: BH method for 28 tests
#FINDING:  
#In the fully-adjusted model, of the 28 possible SEX/AGEBAND combinations, the one that is statistically significant - and survives a correction for multiple comparisons - is "Males < 30" versus "Females 30-34." Here the Females' mean is 1.327 points higher than the Males', p=0.0292.  (The F-M differential for entire adjusted model was only 0.388 points.)


#Since Females 30-34 have the highest adjusted Pain Score, how does that score compare to the mean Pain Score of ALL OTHER RESPONDENTS?


# Mean for Females 30-34
contrast( pscore.adj2, list(F3034 = c(0,0,1,0,0,0,0,0)))
##  contrast estimate    SE  df t.ratio p.value
##  F3034        2.34 0.135 226  17.267  <.0001
#Mean for Everyone Else
contrast( pscore.adj2, list(OTHERS = c(1/7, 1/7, 0, 1/7, 1/7, 1/7, 1/7, 1/7)))
##  contrast estimate    SE  df t.ratio p.value
##  OTHERS       1.27 0.403 226   3.150  0.0019
#Contast Females 30-34 relative to all other respondents
contrast( pscore.adj2, list("Females 30-34 rel to Others" = c(-1/7, -1/7, 1, -1/7, -1/7, -1/7, -1/7, -1/7)))
##  contrast                    estimate    SE  df t.ratio p.value
##  Females 30-34 rel to Others     1.07 0.416 226   2.562  0.0111
# EQUIP_SCORE predicting PAIN_SCORE when controlled for BIRTHSEX & TRAINING_LEVEL & AGEBAND
# Should Equipment Score be the "outcome of interest"? (In other words, EQUIP_SCORE observed after controlling for Sex and Training Level ... or Age?) 

summary(lm(PAIN_SCORE ~ EQUIP_SCORE + BIRTHSEX + TRAINING_LEVEL  , data = SCORES))
## 
## Call:
## lm(formula = PAIN_SCORE ~ EQUIP_SCORE + BIRTHSEX + TRAINING_LEVEL, 
##     data = SCORES)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7113 -0.8949 -0.2231  0.6330  3.9872 
## 
## Coefficients:
##                           Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                3.19609    0.45695   6.994 3.01e-11 ***
## EQUIP_SCORE               -0.12125    0.05874  -2.064   0.0401 *  
## BIRTHSEXM                 -0.45272    0.18429  -2.456   0.0148 *  
## TRAINING_LEVELFirst Year  -0.68523    0.39597  -1.731   0.0849 .  
## TRAINING_LEVELSecond Year -0.24227    0.39517  -0.613   0.5404    
## TRAINING_LEVELThird Year  -0.48804    0.40124  -1.216   0.2251    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.352 on 225 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.07312,    Adjusted R-squared:  0.05252 
## F-statistic:  3.55 on 5 and 225 DF,  p-value: 0.004141
anova(lm(PAIN_SCORE   ~ EQUIP_SCORE + BIRTHSEX + TRAINING_LEVEL , data = SCORES))
## Analysis of Variance Table
## 
## Response: PAIN_SCORE
##                 Df Sum Sq Mean Sq F value  Pr(>F)  
## EQUIP_SCORE      1  10.72 10.7194  5.8616 0.01627 *
## BIRTHSEX         1  11.42 11.4184  6.2438 0.01318 *
## TRAINING_LEVEL   3  10.32  3.4406  1.8814 0.13353  
## Residuals      225 411.47  1.8288                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#LOGISTIC MODEL - Does higher EQUIP_SCORE reduce odds of experiencing PAIN/INJURY

PAIN2 <-
          sqldf(" select a.*,
                        b.EQUIP_SCORE,
                        b.TRNG_SCORE,
                        case when b.PAIN_SCORE = 0 then 0 else 1 end as EVER_INJ_PAIN
                 from PAIN as a left join SCORES as b on (a.RECORD_ID = b.RECORD_ID) ")

  
  everinj.equip.glm.unadj <- glm( EVER_INJ_PAIN ~ EQUIP_SCORE  , data= PAIN2 , family= binomial(link="logit"))
  everinj.equip.emmeans.unadj <- emmeans( everinj.equip.glm.unadj,  c(~EQUIP_SCORE), at = list(EQUIP_SCORE = c(1:7)), type= "response", adjust="none")
  
  
  
  everinj.equip.glm.adj <- glm( EVER_INJ_PAIN ~ EQUIP_SCORE + BIRTHSEX  , data= PAIN2 , family= binomial(link="logit"))
  everinj.equip.emmeans.adj <- emmeans( everinj.equip.glm.adj,  ~c(EQUIP_SCORE, BIRTHSEX), at = list(EQUIP_SCORE = c(1:7)), type= "response", adjust="none")
  
  everinj.trng.glm.unadj <- glm( EVER_INJ_PAIN ~ TRNG_SCORE  , data= PAIN2 , family= binomial(link="logit"))
  everinj.trng.emmeans.unadj <- emmeans( everinj.trng.glm.unadj,  c(~TRNG_SCORE), at = list(TRNG_SCORE = c(5:24)), type= "response", adjust="none")
  
  
  
  summary(everinj.equip.glm.unadj)
## 
## Call:
## glm(formula = EVER_INJ_PAIN ~ EQUIP_SCORE, family = binomial(link = "logit"), 
##     data = PAIN2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4836   0.3578   0.4175   0.4862   0.8639  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.6786     0.6947   5.295 1.19e-07 ***
## EQUIP_SCORE  -0.3206     0.1380  -2.323   0.0202 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.50  on 231  degrees of freedom
## Residual deviance: 140.15  on 230  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 144.15
## 
## Number of Fisher Scoring iterations: 5
  summary(everinj.equip.emmeans.unadj)
##  EQUIP_SCORE  prob     SE  df asymp.LCL asymp.UCL
##            1 0.966 0.0184 Inf     0.905     0.989
##            2 0.954 0.0193 Inf     0.897     0.980
##            3 0.938 0.0193 Inf     0.888     0.967
##            4 0.917 0.0191 Inf     0.871     0.947
##            5 0.889 0.0227 Inf     0.836     0.926
##            6 0.853 0.0358 Inf     0.768     0.910
##            7 0.808 0.0597 Inf     0.664     0.899
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
  summary(everinj.equip.glm.adj)
## 
## Call:
## glm(formula = EVER_INJ_PAIN ~ EQUIP_SCORE + BIRTHSEX, family = binomial(link = "logit"), 
##     data = PAIN2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5356   0.3319   0.3862   0.4630   0.8822  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   3.7788     0.7177   5.265  1.4e-07 ***
## EQUIP_SCORE  -0.3025     0.1408  -2.149   0.0317 *  
## BIRTHSEXM    -0.3129     0.4771  -0.656   0.5119    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.50  on 231  degrees of freedom
## Residual deviance: 139.71  on 229  degrees of freedom
##   (4 observations deleted due to missingness)
## AIC: 145.71
## 
## Number of Fisher Scoring iterations: 5
  summary(everinj.equip.emmeans.adj)
##  EQUIP_SCORE BIRTHSEX  prob     SE  df asymp.LCL asymp.UCL
##            1 F        0.970 0.0175 Inf     0.909     0.991
##            2 F        0.960 0.0192 Inf     0.900     0.984
##            3 F        0.946 0.0212 Inf     0.886     0.976
##            4 F        0.929 0.0247 Inf     0.862     0.964
##            5 F        0.906 0.0324 Inf     0.821     0.953
##            6 F        0.877 0.0470 Inf     0.752     0.944
##            7 F        0.840 0.0703 Inf     0.653     0.936
##            1 M        0.959 0.0246 Inf     0.873     0.988
##            2 M        0.946 0.0261 Inf     0.865     0.979
##            3 M        0.928 0.0268 Inf     0.855     0.966
##            4 M        0.905 0.0275 Inf     0.836     0.947
##            5 M        0.876 0.0313 Inf     0.800     0.925
##            6 M        0.839 0.0434 Inf     0.735     0.907
##            7 M        0.794 0.0661 Inf     0.636     0.895
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
  summary(everinj.trng.glm.unadj)
## 
## Call:
## glm(formula = EVER_INJ_PAIN ~ TRNG_SCORE, family = binomial(link = "logit"), 
##     data = PAIN2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4195   0.3967   0.4335   0.4732   0.5819  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.16745    0.87178   3.633  0.00028 ***
## TRNG_SCORE  -0.07386    0.06568  -1.125  0.26072    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 145.09  on 229  degrees of freedom
## Residual deviance: 143.80  on 228  degrees of freedom
##   (6 observations deleted due to missingness)
## AIC: 147.8
## 
## Number of Fisher Scoring iterations: 5
  summary(everinj.trng.emmeans.unadj)
##  TRNG_SCORE  prob     SE  df asymp.LCL asymp.UCL
##           5 0.943 0.0304 Inf     0.845     0.980
##           6 0.938 0.0290 Inf     0.851     0.976
##           7 0.934 0.0273 Inf     0.856     0.971
##           8 0.929 0.0255 Inf     0.860     0.966
##           9 0.924 0.0236 Inf     0.863     0.959
##          10 0.919 0.0217 Inf     0.865     0.953
##          11 0.913 0.0202 Inf     0.865     0.946
##          12 0.907 0.0195 Inf     0.862     0.939
##          13 0.901 0.0201 Inf     0.854     0.934
##          14 0.894 0.0225 Inf     0.841     0.931
##          15 0.887 0.0267 Inf     0.823     0.930
##          16 0.879 0.0325 Inf     0.800     0.930
##          17 0.871 0.0398 Inf     0.771     0.931
##          18 0.863 0.0483 Inf     0.739     0.933
##          19 0.854 0.0579 Inf     0.702     0.935
##          20 0.844 0.0687 Inf     0.661     0.938
##          21 0.834 0.0805 Inf     0.617     0.940
##          22 0.824 0.0933 Inf     0.570     0.943
##          23 0.813 0.1073 Inf     0.522     0.945
##          24 0.801 0.1222 Inf     0.473     0.948
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
  # 
  # 
  # summary(everinj.respect.glm.adj)
  # summary(everinj.respect.emmeans.adj, adjust="none")
  # contrast(everinj.respect.emmeans.adj, list("Respect1 - Respect2" = c(-1/2, 1/2, 0, 0, 0, 0, 0, 0, 0,
  #                                                                      -1/2, 1/2, 0 ,0, 0, 0, 0, 0, 0 )))
  # contrast(everinj.respect.emmeans.adj, list("Respect5 - Respect6" = c(0, 0, 0, 0, -1/2, 1/2, 0, 0, 0,
  #                                                                      0, 0 ,0, 0, -1/2, 1/2, 0, 0 ,0)))


Question 4: What happens if we attempt to compute a “Respect Score?” Gender differences?

Nine-Point Scale, higher score indicates higher level of Respect
  • Confident Asking Nurses (Yes=1, default=0)
  • Confident Asking Assistants (Yes=1, default=0)
  • Frequency Asking Nurses (Once=1, default=0)
  • Recognized by ES Staff (Yes=1, default=0)
  • Recognized by Anesthetists (Yes=1, default=0)
  • Recognized by GI Attending (Yes=1, default=0)
  • Pains dismissed as Growing Pain (Yes=0, default=1, REVERSE SCORED)
  • Teachers train with sensitivity to stature (Yes=1, default=0)
  • First Name Used No Permission (Yes=0, default=1, REVERSE SCORED)


# CREATE RESPECT INDEX 


 
  
  RESPECT <-
    
    SURVEY %>% 
    mutate( BIRTHSEX2 = case_when( BIRTHSEX == "M" ~ 0,
                                   TRUE ~ 1 ),
            
            HEIGHT = recode_factor(HEIGHT2, "< 5'"='1', 
                                   "5-5'3" = '2',
                                   "5'4-5'6" = '3',
                                   "5'7-5'9" = '4',
                                   "5'10-6'" = '5',
                                   "6'1-6'4" = '6',
                                   "> 6'4" = '7'),
            HEIGHT = relevel(HEIGHT, ref="5"),
            
            HTCAT = recode_factor(HEIGHT2, "< 5'"='0', 
                                  "5-5'3" = '0',
                                  "5'4-5'6" = '0',
                                  "5'7-5'9" = '1',
                                  "5'10-6'" = '1',
                                  "6'1-6'4" = '1',
                                  "> 6'4" = '1'),
            
            RESPECT_NURSES = case_when (is.na(COMFORTABLE_ASKING_NURSES) ~ NA_real_,
                                        COMFORTABLE_ASKING_NURSES == 'Y' ~ 1,
                                        TRUE ~ 0 ),
            
            RESPECT_TECHS = case_when (is.na(COMFORTABLE_ASKING_TECHS) ~ NA_real_,
                                       COMFORTABLE_ASKING_TECHS == "Y" ~ 1,
                                       TRUE ~ 0),
            
            RESPECT_NFREQ = case_when (is.na(NURSES_ASKING) ~ NA_real_,
                                       NURSES_ASKING == "Once" ~ 1,
                                       TRUE ~ 0),
            
            RESPECT_ESTAFF = case_when (is.na(RECOGNIZED_RESPECTED_ES_STAFF) ~ NA_real_,
                                        RECOGNIZED_RESPECTED_ES_STAFF == "Y" ~ 1,
                                        TRUE ~ 0),
            
            RESPECT_ANES = case_when (is.na(RECOGNIZED_RESPECTED_ANESTHETISTS) ~ NA_real_,
                                      RECOGNIZED_RESPECTED_ANESTHETISTS == "Y" ~ 1,
                                      TRUE ~ 0),
            
            RESPECT_GIATT = case_when (is.na(RECOGNIZED_RESPECTED_ANESTHETISTS) ~ NA_real_,
                                       RECOGNIZED_RESPECTED_GASTRO_ATTENDING == "Y" ~ 1,
                                       TRUE ~ 0),
            
            RESPECT_GPAINS = case_when (is.na(GROWING_PAINS) ~ NA_real_,
                                        GROWING_PAINS  == "Y" ~ 0,
                                        TRUE ~ 1),
            
            RESPECT_SENSITIVITY = case_when (is.na(TEACHER_SENSITIVITY_STATURE_HANDSIZE) ~ NA_real_,
                                             TEACHER_SENSITIVITY_STATURE_HANDSIZE  == "Y" ~ 1,
                                             TRUE ~ 0),
            
            
            RESPECT_FNAME =  case_when (is.na(FIRST_NAME_NO_PERMISSION) ~ NA_real_,
                                        FIRST_NAME_NO_PERMISSION == "Y" ~ 0,
                                        TRUE ~ 1)) %>% 
    
    
    rowwise() %>% 
    mutate(RESPECT_SCORE = sum( RESPECT_NURSES + RESPECT_TECHS + RESPECT_NFREQ + RESPECT_ESTAFF + RESPECT_ANES + 
                                  RESPECT_GIATT + RESPECT_GPAINS + RESPECT_SENSITIVITY + RESPECT_FNAME)) %>% 
    
    select(RECORD_ID, BIRTHSEX, BIRTHSEX2, AGE2, RACE, RACE2, HEIGHT, HTCAT, TRAINING_LEVEL, EVER_INJURED, starts_with("RESPECT_") )
  
  
  
  RESPECT %>% 
    group_by(BIRTHSEX) %>% 
    summarize( RSPECTMEAN = mean(RESPECT_SCORE, na.rm=T),
               RSPECTSD = sd(RESPECT_SCORE, na.rm=T))
## # A tibble: 2 × 3
##   BIRTHSEX RSPECTMEAN RSPECTSD
##   <fct>         <dbl>    <dbl>
## 1 F              6.38     1.78
## 2 M              7.08     1.35
shapiro.test(RESPECT$RESPECT_SCORE)
## 
##  Shapiro-Wilk normality test
## 
## data:  RESPECT$RESPECT_SCORE
## W = 0.88243, p-value = 2.431e-12
eov.ttest(RESPECT, RESPECT_SCORE, BIRTHSEX)
## [1] "F Test p.value = 0.003502347  EOV = FALSE (Satterthwaite)"
## [1] "RESPECT : RESPECT_SCORE ~ BIRTHSEX"
## 
##  Welch Two Sample t-test
## 
## data:  RESPECT : RESPECT_SCORE ~ BIRTHSEX
## t = -3.3138, df = 200.78, p-value = 0.001092
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -1.1146980 -0.2830084
## sample estimates:
## mean in group F mean in group M 
##        6.376147        7.075000
eov.ttest(RESPECT, RESPECT_SCORE, RACE2)
## [1] "F Test p.value = 0.6175523  EOV = TRUE (Pooled)"
## [1] "RESPECT : RESPECT_SCORE ~ RACE2"
## 
##  Two Sample t-test
## 
## data:  RESPECT : RESPECT_SCORE ~ RACE2
## t = 1.2879, df = 227, p-value = 0.1991
## alternative hypothesis: true difference in means between group WHITE and group NON-WHITE is not equal to 0
## 95 percent confidence interval:
##  -0.1462269  0.6980528
## sample estimates:
##     mean in group WHITE mean in group NON-WHITE 
##                6.898990                6.623077
ggbetweenstats( data= RESPECT,
                x = BIRTHSEX,
                y = RESPECT_SCORE,
                type="parametric",
                p.adjust.method = "none",
                title = "Mean RESPECT Scores by Birth Sex")

ggbetweenstats( data= RESPECT,
                x = RACE2,
                y = RESPECT_SCORE,
                type="parametric",
                p.adjust.method = "none",
                title = "Mean RESPECT Scores by Race (Broad)")

respect.birthsex.lm.unadj  <- lm( RESPECT_SCORE ~ BIRTHSEX , data= RESPECT)
respect.htcat.lm.unadj    <-  lm( RESPECT_SCORE ~ HTCAT , data= RESPECT)
respect.birthsex.lm.adj   <- lm( RESPECT_SCORE ~ BIRTHSEX + RACE2 , data= RESPECT)
      


  tab_model( respect.birthsex.lm.unadj,
             respect.birthsex.lm.adj,
             respect.htcat.lm.unadj,
             
             title= "Respect Score - Comparison of Three Models based on Quasi Score <br>.",
             dv.labels= c('Birth Sex Unadj','Height Cat Unadj', 'Birth Sex Adjusted Race'))
Respect Score - Comparison of Three Models based on Quasi Score
.
  Birth Sex Unadj Height Cat Unadj Birth Sex Adjusted Race
Predictors Estimates CI p Estimates CI p Estimates CI p
(Intercept) 6.38 6.08 – 6.67 <0.001 6.45 6.03 – 6.87 <0.001 6.31 5.99 – 6.62 <0.001
BIRTHSEX [M] 0.70 0.29 – 1.11 0.001 0.67 0.25 – 1.10 0.002
RACE2 [NON-WHITE] -0.11 -0.53 – 0.32 0.623
HTCAT [1] 0.75 0.33 – 1.16 <0.001
Observations 229 229 229
R2 / R2 adjusted 0.047 / 0.043 0.048 / 0.040 0.053 / 0.048
# Can one's RESPECT score have an influence on Injury/Pain reports?
  
  
  
  RESPECT2 <-
      sqldf( "select a.*,
                    case when b.PAIN_SCORE = 0 then 0 else 1 end as EVER_INJ_PAIN
              from RESPECT as a left join PAIN as b on 
              a.RECORD_ID = b.RECORD_ID ")
  
  
  
  everinj.respect.glm.unadj <- glm( EVER_INJ_PAIN ~ RESPECT_SCORE  , data= RESPECT2 , family= binomial(link="logit"))
  everinj.respect.emmeans.unadj <- emmeans( everinj.respect.glm.unadj,  c(~RESPECT_SCORE), at = list(RESPECT_SCORE = c(1:9)), type= "response", adjust="none")
  
  
  everinj.respect.glm.adj <- glm( EVER_INJ_PAIN ~ RESPECT_SCORE + BIRTHSEX  , data= RESPECT2 , family= binomial(link="logit"))
  everinj.respect.emmeans.adj <- emmeans( everinj.respect.glm.adj,  ~c(RESPECT_SCORE, BIRTHSEX), at = list(RESPECT_SCORE = c(1:9)), type= "response", adjust="none")
  
  summary(everinj.respect.glm.unadj)
## 
## Call:
## glm(formula = EVER_INJ_PAIN ~ RESPECT_SCORE, family = binomial(link = "logit"), 
##     data = RESPECT2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7193   0.3506   0.4369   0.5418   0.6677  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     5.4997     1.4848   3.704 0.000212 ***
## RESPECT_SCORE  -0.4569     0.1972  -2.317 0.020510 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 144.89  on 228  degrees of freedom
## Residual deviance: 138.16  on 227  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 142.16
## 
## Number of Fisher Scoring iterations: 6
  summary(everinj.respect.glm.adj)
## 
## Call:
## glm(formula = EVER_INJ_PAIN ~ RESPECT_SCORE + BIRTHSEX, family = binomial(link = "logit"), 
##     data = RESPECT2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.7612   0.3254   0.4024   0.4959   0.6957  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     5.5457     1.4955   3.708 0.000209 ***
## RESPECT_SCORE  -0.4390     0.2000  -2.195 0.028195 *  
## BIRTHSEXM      -0.2997     0.4747  -0.631 0.527825    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 144.89  on 228  degrees of freedom
## Residual deviance: 137.76  on 226  degrees of freedom
##   (7 observations deleted due to missingness)
## AIC: 143.76
## 
## Number of Fisher Scoring iterations: 6
  summary(everinj.respect.emmeans.adj, adjust="none")
##  RESPECT_SCORE BIRTHSEX  prob      SE  df asymp.LCL asymp.UCL
##              1 F        0.994 0.00779 Inf     0.928     1.000
##              2 F        0.991 0.01026 Inf     0.923     0.999
##              3 F        0.986 0.01311 Inf     0.918     0.998
##              4 F        0.978 0.01616 Inf     0.911     0.995
##              5 F        0.966 0.01906 Inf     0.901     0.989
##              6 F        0.948 0.02188 Inf     0.884     0.978
##              7 F        0.922 0.02692 Inf     0.850     0.961
##              8 F        0.884 0.04111 Inf     0.777     0.944
##              9 F        0.831 0.07181 Inf     0.644     0.931
##              1 M        0.992 0.01080 Inf     0.898     0.999
##              2 M        0.987 0.01420 Inf     0.892     0.999
##              3 M        0.981 0.01809 Inf     0.886     0.997
##              4 M        0.970 0.02206 Inf     0.879     0.993
##              5 M        0.955 0.02534 Inf     0.870     0.985
##              6 M        0.932 0.02704 Inf     0.856     0.969
##              7 M        0.898 0.02834 Inf     0.827     0.942
##              8 M        0.850 0.03837 Inf     0.759     0.911
##              9 M        0.785 0.06868 Inf     0.622     0.890
## 
## Confidence level used: 0.95 
## Intervals are back-transformed from the logit scale
  contrast(everinj.respect.emmeans.adj, list("Respect1 - Respect2" = c(-1/2, 1/2, 0, 0, 0, 0, 0, 0, 0,
                                                                       -1/2, 1/2, 0 ,0, 0, 0, 0, 0, 0 )))
##  contrast            odds.ratio    SE  df null z.ratio p.value
##  Respect1 / Respect2      0.645 0.129 Inf    1  -2.195  0.0282
## 
## Tests are performed on the log odds ratio scale
  contrast(everinj.respect.emmeans.adj, list("Respect5 - Respect6" = c(0, 0, 0, 0, -1/2, 1/2, 0, 0, 0,
                                                                       0, 0 ,0, 0, -1/2, 1/2, 0, 0 ,0)))
##  contrast            odds.ratio    SE  df null z.ratio p.value
##  Respect5 / Respect6      0.645 0.129 Inf    1  -2.195  0.0282
## 
## Tests are performed on the log odds ratio scale
  #CONCLUSION
  
  # For each increment a person improves on the RESPECT_SCORE scale, the exp. odds of incurring an injury or experiencing transient pain REDUCE by ~ 35% in the model adjusted for BIRTHSEX.


Question 5: What happens if we use Factor Analysis as a means of data reduction and to confirm that the variables that we intuitively assume belong together actually load highly on the same factor?

  • Pass along virtually all non-demographic variables, converting answers to a numeric scale
  • Examine correlation matrix to highlight pairs of overly correlated variables (PCoeff >0.90). Consider dropping one of the variables
  • Examine results of the Kaiser-Meyer-Olin Factor Adequacy Test. Consider dropping any variable with a score below 0.50.
  • Examine Eigen Value chart and decide on (1) number of factors to output; (2) whether to rotate your factor solution
  • Output Factor Scores and append to base dataset so that T-tests and regressions can be run by various categorical variables (Sex, Training Level, Agecat, etc.)



FACTOR_BASE <-
  
  SURVEY %>% 
  mutate(
          HTCAT = recode_factor(HEIGHT2, "< 5'"='0', 
                                "5-5'3" = '0',
                                "5'4-5'6" = '0',
                                "5'7-5'9" = '1',
                                "5'10-6'" = '1',
                                "6'1-6'4" = '1',
                                "> 6'4" = '1'),
       
          PAIN_INJURY =  case_when(  is.na(EVER_INJURED) ~ NA_real_,
                                     EVER_INJURED == "Y" ~ 1,
                                     TRUE ~ 0),
          
          PAIN_HAND =  case_when(  is.na(EXPERIENCED_TRANSIENT_PAIN_HAND) ~ NA_real_,
                                   EXPERIENCED_TRANSIENT_PAIN_HAND == "Y" ~ 1,
                                   TRUE ~ 0),        
          PAIN_NECKSH =  case_when(  is.na(EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER) ~ NA_real_,
                                     EXPERIENCED_TRANSIENT_PAIN_NECK_SHOULDER == "Y" ~ 1,
                                   TRUE ~ 0),        
          PAIN_BACK =  case_when(  is.na(EXPERIENCED_TRANSIENT_PAIN_BACK) ~ NA_real_,
                                   EXPERIENCED_TRANSIENT_PAIN_BACK == "Y" ~ 1,
                                   TRUE ~ 0),        
          PAIN_LEG =  case_when(  is.na(EXPERIENCED_TRANSIENT_PAIN_LEG) ~ NA_real_,
                                  EXPERIENCED_TRANSIENT_PAIN_LEG == "Y" ~ 1,
                                   TRUE ~ 0),        
          PAIN_FOOT = case_when(  is.na(EXPERIENCED_TRANSIENT_PAIN_FOOT) ~ NA_real_,
                                     EXPERIENCED_TRANSIENT_PAIN_FOOT == "Y" ~ 1,
                                      TRUE ~ 0),
          
    
         # DEMO_HOURS = case_when (is.na(PERFORMANCE_HOURS) ~ NA_real_,
         #                          PERFORMANCE_HOURS == '< 10' ~ 1,
         #                          PERFORMANCE_HOURS == '10-20' ~ 2,
         #                          PERFORMANCE_HOURS == '21-30' ~ 3,
         #                          PERFORMANCE_HOURS == '31-40' ~ 4,
         #                          TRUE ~ 5 ),

        # DEMO_FEMTEACH = case_when (is.na(FEMALE_TRAINERS) ~ NA_real_,
        #                            FEMALE_TRAINERS == '1-2' ~ 1,
        #                            FEMALE_TRAINERS == '3-5' ~ 2,
        #                            FEMALE_TRAINERS == '6-10' ~ 3,
        #                            FEMALE_TRAINERS == '> 10' ~ 4,
        #                            TRUE ~ 0 ),
        # 
        # DEMO_MALETEACH = case_when (is.na(MALE_TRAINERS) ~ NA_real_,
        #                             MALE_TRAINERS == '1-2' ~ 1,
        #                             MALE_TRAINERS == '3-5' ~ 2,
        #                             MALE_TRAINERS == '6-10' ~ 3,
        #                             MALE_TRAINERS == '> 10' ~ 4,
        #                            TRUE ~ 0 ),
        # 
        #   DEMO_SGPREF = case_when (is.na(TEACHER_GENDER_PREFERENCE) ~ NA_real_,
        #                           TEACHER_GENDER_PREFERENCE == 'Yes' ~ 1,
        #                           TRUE ~ 0),

         # DEMO_GLOVESIZE = GLOVE,

          
          
        
          
          #Higher numbers equal higher levels of training/attention
          
          TRNG_FORMAL = case_when (is.na(FELLOWSHIP_FORMAL_ERGO_TRAINING) ~ NA_real_,
                                   FELLOWSHIP_FORMAL_ERGO_TRAINING == 'Y' ~ 1,
                                   TRUE ~ 0 ),
          
          
          TRNG_INFORMAL = case_when (is.na(INFORMAL_TRAINING) ~ NA_real_,
                                   INFORMAL_TRAINING == 'Y' ~ 1,
                                   TRUE ~ 0 ),
          
          
          TRNG_POSTURAL = case_when (is.na(TRAINING_TECHNIQUES_POSTURAL) ~ NA_real_,
                                     TRAINING_TECHNIQUES_POSTURAL == "Y" ~ 1,
                                     TRUE ~ 0),
          
          TRNG_BEDHT = case_when (is.na(TRAINING_TECHNIQUES_BEDHEIGHT) ~ NA_real_,
                                  TRAINING_TECHNIQUES_BEDHEIGHT == "Y" ~ 1,
                                  TRUE ~ 0),
          
          TRNG_BEDANG = case_when (is.na(TRAINING_TECHNIQUES_BEDANGLE) ~ NA_real_,
                                   TRAINING_TECHNIQUES_BEDANGLE == "Y" ~ 1,
                                   TRUE ~ 0),
          
          TRNG_MONITOR = case_when (is.na(TRAINING_TECHNIQUES_MONITORHEIGHT) ~ NA_real_,
                                    TRAINING_TECHNIQUES_MONITORHEIGHT == "Y" ~ 1,
                                    TRUE ~ 0),
          
          TRNG_MANEUVERS = case_when (is.na(TRAINING_TECHNIQUES_MUSCULOSKELETAL) ~ NA_real_,
                                      TRAINING_TECHNIQUES_MUSCULOSKELETAL == "Y" ~ 1,
                                      TRUE ~ 0) ,
          
          TRNG_EXERCISE = case_when (is.na(TRAINING_TECHNIQUES_EXERCISE_STRETCHING) ~ NA_real_,
                                     TRAINING_TECHNIQUES_EXERCISE_STRETCHING == "Y" ~ 1,
                                     TRUE ~ 0),
          
          TRNG_DIALEXT= case_when (is.na(TRAINING_TECHNIQUES_DIAL_EXTENDERS) ~ NA_real_,
                                   TRAINING_TECHNIQUES_DIAL_EXTENDERS == "Y" ~ 1,
                                   TRUE ~ 0),
          
          TRNG_PEDISCOPE= case_when (is.na(TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE) ~ NA_real_,
                                     TRAINING_TECHNIQUES_PEDIATRIC_COLONOSCOPE == "Y" ~ 1,
                                     TRUE ~ 0),
          
          TRNG_FEEDBACK= case_when (is.na(ERGO_FEEDBACK) ~ NA_real_,
                                    ERGO_FEEDBACK == "Often" ~ 3,
                                    ERGO_FEEDBACK == "Sometimes" ~ 2,
                                    ERGO_FEEDBACK == "Rarely" ~ 1,
                                    TRUE ~ 0),
          
          TRNG_OPTIM= case_when (is.na(ERGO_OPTIMIZATION) ~ NA_real_,
                                 ERGO_OPTIMIZATION == "Y" ~ 1,
                                 ERGO_OPTIMIZATION == "N" ~ 0,
                                 TRUE ~ 0),
          
          
          #Higher numbers equal heightened equipment availability
          
          EQUIP_GLOVE_AVAIL = case_when (is.na(GLOVE_SIZE_AVAILABLE) ~ NA_real_,
                                        GLOVE_SIZE_AVAILABLE == "Y" ~ 1,
                                        TRUE ~ 0),
          
          EQUIP_DIALEXT_AVAIL= case_when (is.na(DIAL_EXTENDERS_AVAILABLE) ~ NA_real_,
                                         DIAL_EXTENDERS_AVAILABLE == "Y" ~ 1,
                                         TRUE ~ 0),
          
          EQUIP_DIALEXT_ENC= case_when (is.na(DIAL_EXTENDERS_ENCOURAGED) ~ NA_real_,
                                       DIAL_EXTENDERS_ENCOURAGED == "Y" ~ 1,
                                       TRUE ~ 0),
          
          # EQUIP_PEDISCOPE_AVAIL = case_when (is.na(PEDI_COLONOSCOPES_AVAILABLE) ~ NA_real_,
          #                                   PEDI_COLONOSCOPES_AVAILABLE == "Y" ~ 1,
          #                                   TRUE ~ 0),
          
          EQUIP_LAPRON_AVAIL = case_when (is.na(LEAD_APRONS_AVAILABLE) ~ NA_real_,
                                          LEAD_APRONS_AVAILABLE == "Y" ~ 2,
                                          LEAD_APRONS_AVAILABLE == "N" ~ 1,
                                             TRUE ~ 0),
          # EQUIP_TO_FORMAL= case_when (is.na(ERGO_FORMAL_TIMEOUT_PRIOR) ~ NA_real_,
          #                            ERGO_FORMAL_TIMEOUT_PRIOR == "Y" ~ 1,
          #                            TRUE ~ 0),

          EQUIP_TO_INFORMAL= case_when (is.na(ERGO_INFORMAL_TIMEOUT_PRIOR) ~ NA_real_,
                                       ERGO_INFORMAL_TIMEOUT_PRIOR == "Y" ~ 1,
                                       TRUE ~ 0),
          
          EQUIP_MONITORS_EASYADJ = case_when (is.na(MONITORS_ADJUSTABLE) ~ NA_real_,
                                             MONITORS_ADJUSTABLE == "Y" ~ 1,
                                             TRUE ~ 0),
          
          
          TRNG_TACTILE_MALES = case_when (is.na(TACTILE_INSTRUCTION_FROM_MALES) ~ NA_real_,
                                          TACTILE_INSTRUCTION_FROM_MALES == "Often" ~ 2,
                                          TACTILE_INSTRUCTION_FROM_MALES == "Rarely" ~ 1,
                                          TRUE ~ 0),

          TRNG_TACTILE_FEMALES = case_when (is.na(TACTILE_INSTRUCTION_FROM_FEMALES) ~ NA_real_,
                                            TACTILE_INSTRUCTION_FROM_FEMALES == "Often" ~ 2,
                                            TACTILE_INSTRUCTION_FROM_FEMALES == "Rarely" ~ 1,
                                            TRUE ~ 0),

          
          
          # Higher numbers indicated heightened levels of respect
          
          RESPECT_NURSES = case_when (is.na(COMFORTABLE_ASKING_NURSES) ~ NA_real_,
                                      COMFORTABLE_ASKING_NURSES == 'Y' ~ 1,
                                      TRUE ~ 0 ),
          
          RESPECT_TECHS = case_when (is.na(COMFORTABLE_ASKING_TECHS) ~ NA_real_,
                                     COMFORTABLE_ASKING_TECHS == "Y" ~ 1,
                                     TRUE ~ 0),
          
          # RESPECT_NFREQ = case_when (is.na(NURSES_ASKING) ~ NA_real_,
          #                            NURSES_ASKING == "Once" ~ 2,
          #                            NURSES_ASKING == "Twice" ~ 1,
          #                            TRUE ~ 0),
          
          RESPECT_ESTAFF = case_when (is.na(RECOGNIZED_RESPECTED_ES_STAFF) ~ NA_real_,
                                      RECOGNIZED_RESPECTED_ES_STAFF == "Y" ~ 1,
                                      TRUE ~ 0),
          
          RESPECT_ANES = case_when (is.na(RECOGNIZED_RESPECTED_ANESTHETISTS) ~ NA_real_,
                                    RECOGNIZED_RESPECTED_ANESTHETISTS == "Y" ~ 1,
                                    TRUE ~ 0),
          
          RESPECT_GIATT = case_when (is.na(RECOGNIZED_RESPECTED_ANESTHETISTS) ~ NA_real_,
                                     RECOGNIZED_RESPECTED_GASTRO_ATTENDING == "Y" ~ 1,
                                     TRUE ~ 0),
          
          RESPECT_GPAINS = case_when (is.na(GROWING_PAINS) ~ NA_real_,
                                      GROWING_PAINS  == "Y" ~ 0,
                                      TRUE ~ 1),
          
          TRNG_SENSITIVITY = case_when (is.na(TEACHER_SENSITIVITY_STATURE_HANDSIZE) ~ NA_real_,
                                           TEACHER_SENSITIVITY_STATURE_HANDSIZE  == "Y" ~ 1,
                                           TRUE ~ 0),
          
          
          RESPECT_FNAME =  case_when (is.na(FIRST_NAME_NO_PERMISSION) ~ NA_real_,
                                      FIRST_NAME_NO_PERMISSION == "Y" ~ 0,
                                      TRUE ~ 1))  %>% 
  
  
  #Mean Substitute missing values FOR EACH SEX
# 
# group_by( BIRTHSEX, RACE2, TRAINING_LEVEL ) %>%
# mutate_at(vars( DEMO_HOURS:RESPECT_FNAME), ~replace_na(., mean(., na.rm = TRUE))) %>%

  
  select( RECORD_ID, BIRTHSEX, RACE2, TRAINING_LEVEL, AGE2, HEIGHT2, HTCAT, 
          starts_with('DEMO_'), starts_with('TRNG_'),  starts_with('EQUIP_'), starts_with('RESPECT_'), starts_with('PAIN_')) %>% 
  
  na.omit()  %>%   left_join(CORRECT[, c("RECORD_ID", "COUNT_CORRECT")], by= 'RECORD_ID')  %>% rename( ERGO_QUIZ = COUNT_CORRECT)  %>%
                   left_join(APRON_SCORE[, c("RECORD_ID", "APRON_SCORE")], by= 'RECORD_ID')  %>% rename( EQUIP_APRONS_CT = APRON_SCORE)


Here’s a glimpse of the structure of the resulting dataset FACTOR_BASE:

glimpse(FACTOR_BASE)
## Rows: 225
## Columns: 43
## $ RECORD_ID              <dbl> 1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 12, 13, 14, 15,…
## $ BIRTHSEX               <fct> F, F, F, M, F, F, F, F, M, M, F, F, F, M, M, F,…
## $ RACE2                  <fct> NON-WHITE, WHITE, WHITE, NON-WHITE, NON-WHITE, …
## $ TRAINING_LEVEL         <ord> Third Year, Third Year, First Year, Advanced, S…
## $ AGE2                   <fct> 30-34, 30-34, 30-34, 30-34, 30-34, 30-34, 30-34…
## $ HEIGHT2                <fct> 5'4-5'6, 5'4-5'6, 5'4-5'6, 6'1-6'4, 5'7-5'9, 5-…
## $ HTCAT                  <fct> 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,…
## $ TRNG_FORMAL            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1,…
## $ TRNG_INFORMAL          <dbl> 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1,…
## $ TRNG_POSTURAL          <dbl> 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1,…
## $ TRNG_BEDHT             <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ TRNG_BEDANG            <dbl> 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1,…
## $ TRNG_MONITOR           <dbl> 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1,…
## $ TRNG_MANEUVERS         <dbl> 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1,…
## $ TRNG_EXERCISE          <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ TRNG_DIALEXT           <dbl> 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1,…
## $ TRNG_PEDISCOPE         <dbl> 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1,…
## $ TRNG_FEEDBACK          <dbl> 2, 1, 2, 1, 1, 2, 2, 2, 2, 1, 3, 2, 1, 1, 1, 2,…
## $ TRNG_OPTIM             <dbl> 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0,…
## $ TRNG_TACTILE_MALES     <dbl> 2, 0, 0, 0, 0, 0, 2, 2, 2, 0, 1, 1, 1, 1, 1, 1,…
## $ TRNG_TACTILE_FEMALES   <dbl> 0, 0, 0, 1, 1, 0, 2, 2, 2, 0, 1, 1, 0, 1, 1, 2,…
## $ TRNG_SENSITIVITY       <dbl> 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 1,…
## $ EQUIP_GLOVE_AVAIL      <dbl> 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1,…
## $ EQUIP_DIALEXT_AVAIL    <dbl> 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ EQUIP_DIALEXT_ENC      <dbl> 0, 0, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ EQUIP_LAPRON_AVAIL     <dbl> 2, 2, 0, 2, 0, 1, 0, 1, 2, 2, 0, 0, 1, 2, 2, 2,…
## $ EQUIP_TO_INFORMAL      <dbl> 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,…
## $ EQUIP_MONITORS_EASYADJ <dbl> 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 1,…
## $ RESPECT_NURSES         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,…
## $ RESPECT_TECHS          <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1,…
## $ RESPECT_ESTAFF         <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,…
## $ RESPECT_ANES           <dbl> 1, 1, 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,…
## $ RESPECT_GIATT          <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
## $ RESPECT_GPAINS         <dbl> 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1,…
## $ RESPECT_FNAME          <dbl> 1, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1,…
## $ PAIN_INJURY            <dbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ PAIN_HAND              <dbl> 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1,…
## $ PAIN_NECKSH            <dbl> 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1,…
## $ PAIN_BACK              <dbl> 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1,…
## $ PAIN_LEG               <dbl> 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
## $ PAIN_FOOT              <dbl> 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0,…
## $ ERGO_QUIZ              <dbl> 0, 4, 4, 3, 3, 3, 3, 5, 4, 1, 3, 5, 3, 2, 2, 5,…
## $ EQUIP_APRONS_CT        <dbl> 2, 3, 0, 6, 0, 4, 0, 4, 3, 6, 0, 0, 1, 1, 1, 5,…



correlations <- round(cor(FACTOR_BASE[, -c(1:7)]),2)
corr_matrix <- as.data.frame(as.table(correlations))
corr_matrix %>% arrange(desc(Freq)) %>% rename(PCoeff= Freq) %>% filter(PCoeff !=1 & abs(PCoeff) > 0.90)
## [1] Var1   Var2   PCoeff
## <0 rows> (or 0-length row.names)
X <- na.omit(FACTOR_BASE[, -c(1:7)])

# summary(lm( ERGO_QUIZ ~ ., data=X ))
# vif(lm( ERGO_QUIZ ~ ., X))


#Kaiser-Meyer-Olkin factor adequacy
# Variables < 0.50 should be eliminated from analysis
# Score north of 0.70 is ideal; above 0.60 is acceptable
  
KMO( X)
## Kaiser-Meyer-Olkin factor adequacy
## Call: KMO(r = X)
## Overall MSA =  0.66
## MSA for each item = 
##            TRNG_FORMAL          TRNG_INFORMAL          TRNG_POSTURAL 
##                   0.64                   0.71                   0.58 
##             TRNG_BEDHT            TRNG_BEDANG           TRNG_MONITOR 
##                   0.69                   0.85                   0.69 
##         TRNG_MANEUVERS          TRNG_EXERCISE           TRNG_DIALEXT 
##                   0.77                   0.71                   0.65 
##         TRNG_PEDISCOPE          TRNG_FEEDBACK             TRNG_OPTIM 
##                   0.69                   0.80                   0.71 
##     TRNG_TACTILE_MALES   TRNG_TACTILE_FEMALES       TRNG_SENSITIVITY 
##                   0.55                   0.54                   0.71 
##      EQUIP_GLOVE_AVAIL    EQUIP_DIALEXT_AVAIL      EQUIP_DIALEXT_ENC 
##                   0.63                   0.61                   0.64 
##     EQUIP_LAPRON_AVAIL      EQUIP_TO_INFORMAL EQUIP_MONITORS_EASYADJ 
##                   0.57                   0.65                   0.63 
##         RESPECT_NURSES          RESPECT_TECHS         RESPECT_ESTAFF 
##                   0.65                   0.65                   0.67 
##           RESPECT_ANES          RESPECT_GIATT         RESPECT_GPAINS 
##                   0.74                   0.65                   0.66 
##          RESPECT_FNAME            PAIN_INJURY              PAIN_HAND 
##                   0.80                   0.62                   0.60 
##            PAIN_NECKSH              PAIN_BACK               PAIN_LEG 
##                   0.75                   0.57                   0.65 
##              PAIN_FOOT              ERGO_QUIZ        EQUIP_APRONS_CT 
##                   0.63                   0.53                   0.62
remove<- colnames(X[ , KMO(X)$MSAi<0.5])
print(remove)
## character(0)
# Bartlett's Test for Sphericity - If you can't reject this null hypothesis, then there is essentially nothing to factor, as all variables are essentially different. 
# In other words, you want the analysis of your data matrix to have a resultant p value less than 0.05

psych::cortest.bartlett( X )
## R was not square, finding R from data
## $chisq
## [1] 2034.94
## 
## $p.value
## [1] 2.113553e-147
## 
## $df
## [1] 630


  • Conclusion: Questions that were targeted only to a subset of respondents (e.g., items around Pregnancy) were eliminated from the final FACTOR_BASE dataset. While some varialbes exhibited a high degree of correlation, none exceeded the 0.90 threshold. However, results of the Kaiser-Meyer-Olin test (KMO) suggested that the following variables should not be part of the factor analysis, as each had a KMO score below 0.50: Availability of *Pedioscopes, Formal Ergo Timeouts, Times Needed to Get Nurses’ Attention,

  • The final FACTOR_BASE dataset contains 36 Questions related to Training, Equipment Availability, Pain/Injury Sustained, as well as two composite variables: ERGO_QUIZ (a variable capturing the number of questions each repsonded answered correctly in the Ergonomic Knowledge section of the survey); and (2) EQUIP_APRON_CT (a variable capturing the number of specialty aprons reponsdents indicated were available at their institutions).

# What does R recommend for the baseline number of Factors?
fa.parallel(X)

## Parallel analysis suggests that the number of factors =  9  and the number of components =  7
ev <- eigen(cor(X))
ev$values
##  [1] 4.2771370 2.7111582 2.4569741 1.9384490 1.8619515 1.5690384 1.4744441
##  [8] 1.4089819 1.2774367 1.2245165 1.1189824 1.0374800 0.9826368 0.9193905
## [15] 0.8908376 0.8352843 0.7843136 0.7744779 0.7220664 0.6916836 0.6864547
## [22] 0.6353313 0.5910143 0.5860888 0.5782687 0.5402646 0.4960308 0.4755599
## [29] 0.4406524 0.3926614 0.3669384 0.3415652 0.2925285 0.2777567 0.2195029
## [36] 0.1221410
nfactors <- rep(1:36)

Eigen_Values <- ev$values
Scree <- data.frame(nfactors, Eigen_Values)

ggplot(data = Scree, mapping = aes(x = nfactors, y = Eigen_Values)) +
  geom_point() +
  geom_line() +
  scale_y_continuous(name = "Eigen Values", limits = c(0,4)) +
  scale_x_continuous(name= "Number of Factors", breaks = scales::breaks_width(1))+
  geom_vline(xintercept= 5, linetype='solid', col = 'red')+
  geom_vline(xintercept= 9, linetype='solid', col = 'darkred')+
  theme_bw() +
  theme(panel.grid.major.y = element_line(color = "darkgrey")) +
  ggtitle("Scree Plot")


  • Conclusion: Although R recommends using nine factors, we can run multiple factor analyses to find the number that (1) explains the highest percentage of variance in our data and, even more important, (2) is easily interpretable. Nine factors may be the default, but the Scree plot makes exploring options with 5 to 8 factors reasonable. Also the first 12 factors have Eigen Values above 1.0, so exploring more factors may make sense. (More factors may make interpreting the loadings more difficult, however.)



FACTANAL.NOROT <- factanal( X, factors=5, scores = c("regression"), rotation = "none", cutoff=0.3)
print(FACTANAL.NOROT$loadings,sort=TRUE, cutoff = .35)
## 
## Loadings:
##                        Factor1 Factor2 Factor3 Factor4 Factor5
## RESPECT_NURSES          0.818                                 
## RESPECT_TECHS           0.706                                 
## RESPECT_ESTAFF          0.587                                 
## TRNG_TACTILE_MALES              0.907                         
## TRNG_TACTILE_FEMALES            0.895                         
## TRNG_INFORMAL                           0.504                 
## TRNG_DIALEXT                                    0.672         
## EQUIP_DIALEXT_AVAIL                             0.758         
## EQUIP_DIALEXT_ENC                               0.647         
## TRNG_FORMAL                                                   
## TRNG_POSTURAL                           0.459                 
## TRNG_BEDHT                                                    
## TRNG_BEDANG                                                   
## TRNG_MONITOR                            0.429                 
## TRNG_MANEUVERS                                                
## TRNG_EXERCISE                                                 
## TRNG_PEDISCOPE                                                
## TRNG_FEEDBACK                   0.414   0.362                 
## TRNG_OPTIM                                                    
## TRNG_SENSITIVITY                        0.395                 
## EQUIP_GLOVE_AVAIL                                             
## EQUIP_LAPRON_AVAIL                                            
## EQUIP_TO_INFORMAL                                             
## EQUIP_MONITORS_EASYADJ                                        
## RESPECT_ANES            0.360                                 
## RESPECT_GIATT                                                 
## RESPECT_GPAINS                                                
## RESPECT_FNAME           0.356                                 
## PAIN_INJURY                                                   
## PAIN_HAND                                                     
## PAIN_NECKSH                                             0.473 
## PAIN_BACK                                               0.361 
## PAIN_LEG                                                0.467 
## PAIN_FOOT                                                     
## ERGO_QUIZ                                                     
## EQUIP_APRONS_CT                                               
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5
## SS loadings      2.634   2.411   1.980   1.925   1.355
## Proportion Var   0.073   0.067   0.055   0.053   0.038
## Cumulative Var   0.073   0.140   0.195   0.249   0.286
FACTANAL.VARIMAX <- factanal( X, factors=7, scores = c("regression"), rotation = "varimax", cutoff=0.3)
print(FACTANAL.VARIMAX$loadings,sort=TRUE, cutoff = .35)
## 
## Loadings:
##                        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7
## TRNG_INFORMAL           0.528                                                 
## TRNG_POSTURAL           0.534                                                 
## TRNG_FEEDBACK           0.507                                                 
## TRNG_TACTILE_MALES              0.932                                         
## TRNG_TACTILE_FEMALES            0.881                                         
## TRNG_DIALEXT                            0.675                                 
## EQUIP_DIALEXT_AVAIL                     0.828                                 
## EQUIP_DIALEXT_ENC                       0.660                                 
## PAIN_NECKSH                                     0.592                         
## PAIN_LEG                                        0.569                         
## RESPECT_NURSES                                          0.810                 
## RESPECT_TECHS                                           0.779                 
## RESPECT_ESTAFF                                                  0.949         
## EQUIP_LAPRON_AVAIL                                                      0.776 
## EQUIP_APRONS_CT                                                         0.686 
## TRNG_FORMAL                                                                   
## TRNG_BEDHT              0.377                                                 
## TRNG_BEDANG             0.401                                                 
## TRNG_MONITOR            0.476                                                 
## TRNG_MANEUVERS          0.488                                                 
## TRNG_EXERCISE                                                                 
## TRNG_PEDISCOPE                                                                
## TRNG_OPTIM                                                                    
## TRNG_SENSITIVITY        0.359                                                 
## EQUIP_GLOVE_AVAIL                                                             
## EQUIP_TO_INFORMAL       0.396                                                 
## EQUIP_MONITORS_EASYADJ                                                        
## RESPECT_ANES                                                    0.381         
## RESPECT_GIATT                                                                 
## RESPECT_GPAINS                                                                
## RESPECT_FNAME                                                                 
## PAIN_INJURY                                                                   
## PAIN_HAND                                                                     
## PAIN_BACK                                       0.405                         
## PAIN_FOOT                                       0.391                         
## ERGO_QUIZ                                                                     
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7
## SS loadings      2.486   1.916   1.857   1.694   1.603   1.539   1.326
## Proportion Var   0.069   0.053   0.052   0.047   0.045   0.043   0.037
## Cumulative Var   0.069   0.122   0.174   0.221   0.265   0.308   0.345


  • Conclusion: Here are two sample factor analyses: one that is unrotated based on a 5-factor solution that explains 29% of the variance; and another that uses a Varimax rotation based on a 7-factor solution that explains 35% of the variance. Since our variables did not display a high degree of pairwise correlation, I have opted to use the unroted solution. Although it explains less of the data’s variance, I prefer that it loads items related to Respect on the first factor, an area of keen interest in this study.



FACTANAL.NOROT5 <- cbind( FACTOR_BASE, FACTANAL.NOROT$scores ) %>%
  rename( F1.RESPECT = Factor1,
          F2.TACTTRNG = Factor2,
          F3.ERGOTRNG = Factor3,
          F4.DIALEXT = Factor4,
          F5.PAIN = Factor5) %>% 
          mutate(TRAINING_LEVEL =  factor(TRAINING_LEVEL, ordered = F),
                 TRAINING_LEVEL = relevel(TRAINING_LEVEL, ref= "Advanced"))




# Confirm factors are totally uncorrelated
pairs.panels( FACTANAL.NOROT5[, c(44:48)] , pch=21, stars=T) 

#Test Factor Results to see whether there are any gender differences


eov.ttest( FACTANAL.NOROT5, F1.RESPECT, BIRTHSEX)
## [1] "F Test p.value = 0.00764274  EOV = FALSE (Satterthwaite)"
## [1] "FACTANAL.NOROT5 : F1.RESPECT ~ BIRTHSEX"
## 
##  Welch Two Sample t-test
## 
## data:  FACTANAL.NOROT5 : F1.RESPECT ~ BIRTHSEX
## t = -2.4894, df = 197.36, p-value = 0.01362
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -0.55729332 -0.06461834
## sample estimates:
## mean in group F mean in group M 
##      -0.1644611       0.1464947
eov.ttest( FACTANAL.NOROT5, F2.TACTTRNG, BIRTHSEX)
## [1] "F Test p.value = 0.7942294  EOV = TRUE (Pooled)"
## [1] "FACTANAL.NOROT5 : F2.TACTTRNG ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  FACTANAL.NOROT5 : F2.TACTTRNG ~ BIRTHSEX
## t = 0.82623, df = 223, p-value = 0.4096
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -0.1469286  0.3590828
## sample estimates:
## mean in group F mean in group M 
##      0.05610297     -0.04997408
eov.ttest( FACTANAL.NOROT5, F3.ERGOTRNG, BIRTHSEX)
## [1] "F Test p.value = 0.8003622  EOV = TRUE (Pooled)"
## [1] "FACTANAL.NOROT5 : F3.ERGOTRNG ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  FACTANAL.NOROT5 : F3.ERGOTRNG ~ BIRTHSEX
## t = -1.0296, df = 223, p-value = 0.3043
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  -0.3470836  0.1088660
## sample estimates:
## mean in group F mean in group M 
##     -0.06299534      0.05611350
eov.ttest( FACTANAL.NOROT5, F4.DIALEXT, BIRTHSEX)
## [1] "F Test p.value = 0.0002035871  EOV = FALSE (Satterthwaite)"
## [1] "FACTANAL.NOROT5 : F4.DIALEXT ~ BIRTHSEX"
## 
##  Welch Two Sample t-test
## 
## data:  FACTANAL.NOROT5 : F4.DIALEXT ~ BIRTHSEX
## t = 2.6885, df = 185.57, p-value = 0.00783
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  0.08565658 0.55789463
## sample estimates:
## mean in group F mean in group M 
##       0.1701835      -0.1515921
eov.ttest( FACTANAL.NOROT5, F5.PAIN, BIRTHSEX)
## [1] "F Test p.value = 0.9219934  EOV = TRUE (Pooled)"
## [1] "FACTANAL.NOROT5 : F5.PAIN ~ BIRTHSEX"
## 
##  Two Sample t-test
## 
## data:  FACTANAL.NOROT5 : F5.PAIN ~ BIRTHSEX
## t = 2.6993, df = 223, p-value = 0.007481
## alternative hypothesis: true difference in means between group F and group M is not equal to 0
## 95 percent confidence interval:
##  0.0774794 0.4965846
## sample estimates:
## mean in group F mean in group M 
##        0.151808       -0.135224
Y <-
  FACTANAL.NOROT5 %>% 
  pivot_longer( cols= F1.RESPECT : F5.PAIN,
                names_to = "FACTOR",
                values_to = "FACTOR.SCORES")  %>% select( RECORD_ID, BIRTHSEX:HTCAT, FACTOR, FACTOR.SCORES)


grouped_ggbetweenstats(
  data = Y,
  x = BIRTHSEX,
  y = FACTOR.SCORES,
  grouping.var = FACTOR,)

#Simple Linear Regressions to see whether significance survives controlling for Race (binary values) & Training Level

fnorot5.lm1 <- lm(F1.RESPECT ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = FACTANAL.NOROT5 )
fnorot5.lm4 <- lm(F4.DIALEXT ~ BIRTHSEX + TRAINING_LEVEL + RACE2 , data = FACTANAL.NOROT5)
fnorot5.lm5 <- lm(F5.PAIN ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = FACTANAL.NOROT5)
fnorot5.lm1h <- lm( F1.RESPECT ~ HTCAT + TRAINING_LEVEL + RACE2 , data = FACTANAL.NOROT5 )

tab_model( fnorot5.lm1,
           fnorot5.lm4,
           fnorot5.lm5,
           fnorot5.lm1h, show.ci=F, show.se=T)
  F1.RESPECT F4.DIALEXT F5.PAIN F1.RESPECT
Predictors Estimates std. Error p Estimates std. Error p Estimates std. Error p Estimates std. Error p
(Intercept) -0.15 0.27 0.590 0.10 0.26 0.694 0.17 0.24 0.461 -0.16 0.28 0.563
BIRTHSEX [M] 0.26 0.13 0.041 -0.28 0.12 0.024 -0.29 0.11 0.009
TRAINING LEVEL [First
Year]
-0.15 0.27 0.569 0.03 0.26 0.913 -0.26 0.23 0.258 -0.17 0.27 0.519
TRAINING LEVEL [Second
Year]
0.01 0.27 0.981 0.12 0.26 0.648 -0.02 0.23 0.924 0.03 0.27 0.918
TRAINING LEVEL [Third
Year]
0.24 0.27 0.377 -0.14 0.26 0.603 0.03 0.23 0.894 0.24 0.27 0.368
RACE2 [NON-WHITE] -0.02 0.13 0.897 0.06 0.12 0.616 0.11 0.11 0.307 -0.03 0.13 0.819
HTCAT [1] 0.26 0.13 0.042
Observations 225 225 225 225
R2 / R2 adjusted 0.055 / 0.033 0.046 / 0.024 0.059 / 0.038 0.055 / 0.033


  • Conclusion: Three of our five factors exhibited significant differences along the BIRTHSEX variable: Factor1-Respect, Factor4-DialExt, and Factor5-Pain/Injury. Moreover, when modeled in simple linear regressions and controlled for both RACE and TRAINING_LEVEL, each factor remained significant above the p= 0.05 level. (Factor5-Pain/Injury was most significant at p= 0.009.)

  • Another interesting finding here is that the HTCAT variable (which divides the sample into those above and below the 5’7” cutoff) behaves almost identically on the Respect factor to Birthsex, which is a result we’ve seen elsewhere. This could further bolster the claim that when it comes to feelings of disrespect in the Endoscopy Suite, height may be just as important a consideration as sex.



#Test Each factor for consistency



f1 <- X[ , c("RESPECT_NURSES", "RESPECT_TECHS", "RESPECT_ESTAFF", "RESPECT_ANES", "RESPECT_FNAME")]
f1a<- alpha(f1, check.keys=T)
alpha(f1, check.keys=T)
## 
## Reliability analysis   
## Call: alpha(x = f1, check.keys = T)
## 
##   raw_alpha std.alpha G6(smc) average_r S/N   ase mean   sd median_r
##        0.7      0.73    0.74      0.36 2.8 0.032 0.85 0.24     0.28
## 
##     95% confidence boundaries 
##          lower alpha upper
## Feldt     0.64   0.7  0.76
## Duhachek  0.64   0.7  0.76
## 
##  Reliability if an item is dropped:
##                raw_alpha std.alpha G6(smc) average_r S/N alpha se  var.r med.r
## RESPECT_NURSES      0.61      0.64    0.58      0.31 1.8    0.041 0.0059  0.28
## RESPECT_TECHS       0.63      0.66    0.62      0.33 2.0    0.041 0.0141  0.28
## RESPECT_ESTAFF      0.62      0.66    0.65      0.32 1.9    0.042 0.0348  0.26
## RESPECT_ANES        0.68      0.72    0.71      0.39 2.6    0.036 0.0356  0.33
## RESPECT_FNAME       0.73      0.75    0.74      0.43 3.0    0.031 0.0287  0.40
## 
##  Item statistics 
##                  n raw.r std.r r.cor r.drop mean   sd
## RESPECT_NURSES 225  0.73  0.77  0.76   0.56 0.89 0.31
## RESPECT_TECHS  225  0.70  0.74  0.70   0.53 0.89 0.31
## RESPECT_ESTAFF 225  0.71  0.75  0.67   0.57 0.92 0.27
## RESPECT_ANES   225  0.67  0.64  0.48   0.41 0.80 0.40
## RESPECT_FNAME  225  0.65  0.58  0.38   0.34 0.72 0.45
## 
## Non missing response frequency for each item
##                   0    1 miss
## RESPECT_NURSES 0.11 0.89    0
## RESPECT_TECHS  0.11 0.89    0
## RESPECT_ESTAFF 0.08 0.92    0
## RESPECT_ANES   0.20 0.80    0
## RESPECT_FNAME  0.28 0.72    0
f1a$total$std.alph
## [1] 0.7344984
f2 <- X[ , c("TRNG_TACTILE_MALES", "TRNG_TACTILE_FEMALES", "TRNG_FEEDBACK")]
f2a<- alpha(f2, check.keys=T)

f3 <- X[ , c("TRNG_INFORMAL","TRNG_POSTURAL","TRNG_MONITOR", "TRNG_SENSITIVITY")]
f3a<- alpha(f3, check.keys=T)


f4 <- X[ , c("TRNG_DIALEXT","EQUIP_DIALEXT_AVAIL","EQUIP_DIALEXT_ENC")]
f4a<- alpha(f4, check.keys=T)

f5 <- X[ , c("PAIN_NECKSH","PAIN_LEG", "PAIN_BACK")]
f5a<- alpha(f5, check.keys=T)



ta<- alpha(X, check.keys=T)


# Cronbach's Alphas for all five factors and for the overall dataset

rbind(F1.Respect.alpha=f1a$total$std.alpha, 
      F2.TactTrng.alpha=f2a$total$std.alpha, 
      F3.ErgoTrng.alpha=f3a$total$std.alpha,
      F4.DialExt.alpha=f4a$total$std.alpha, 
      F5.Pain.alpha=f5a$total$std.alpha, 
      Total.alpha= ta$total$std.alpha)
##                        [,1]
## F1.Respect.alpha  0.7344984
## F2.TactTrng.alpha 0.7505630
## F3.ErgoTrng.alpha 0.5201925
## F4.DialExt.alpha  0.7595929
## F5.Pain.alpha     0.5514775
## Total.alpha       0.7606410



  • Conclusion: Overall, our factors have a Standardized Cronbach’s Alpha of 0.761, which is considered very good. Three of our factors – F1.Respect, F2.TactileTraining, F4.DialExtenders – also have CA’s above 0.70. However, two factors – F3.ErgoTraining and F5.Pain/Injury – have CA’s of 0.52 and 0.55, respectively. Removing variables from the factor do not raise the CA’s appreciably, according to the function output (not displayed). Is this cause for concern?



  • Here’s what a 9-Factor Varimax Rotated Factor Solution produces. (Remember, this was the default number of factors suggested by R.)


FACTANAL.VARIMAX <- factanal( X, factors=9, scores = c("regression"), rotation = "varimax", cutoff=0.3)
print(FACTANAL.VARIMAX$loadings,sort=TRUE, cutoff = .3)
## 
## Loadings:
##                        Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7
## TRNG_INFORMAL           0.595                                                 
## TRNG_POSTURAL           0.566                                                 
## TRNG_TACTILE_MALES              0.936                                         
## TRNG_TACTILE_FEMALES            0.882                                         
## TRNG_DIALEXT                            0.673                                 
## EQUIP_DIALEXT_AVAIL                     0.814                                 
## EQUIP_DIALEXT_ENC                       0.670                                 
## RESPECT_NURSES                                  0.912                         
## RESPECT_TECHS                                   0.710                         
## PAIN_NECKSH                                             0.522                 
## PAIN_LEG                                                0.719                 
## RESPECT_ESTAFF                                  0.404           0.577         
## RESPECT_ANES                                                    0.575         
## RESPECT_GIATT                                                   0.501         
## EQUIP_LAPRON_AVAIL                                                      0.707 
## EQUIP_APRONS_CT                                                         0.742 
## TRNG_MONITOR            0.431                                                 
## EQUIP_MONITORS_EASYADJ                                                        
## TRNG_FORMAL             0.306                                                 
## TRNG_BEDHT              0.421                                                 
## TRNG_BEDANG             0.319                                                 
## TRNG_MANEUVERS          0.432                                                 
## TRNG_EXERCISE                                                                 
## TRNG_PEDISCOPE                                                                
## TRNG_FEEDBACK           0.449   0.326                                         
## TRNG_OPTIM                                                                    
## TRNG_SENSITIVITY                                                              
## EQUIP_GLOVE_AVAIL                                                             
## EQUIP_TO_INFORMAL       0.344                                                 
## RESPECT_GPAINS                                                                
## RESPECT_FNAME                                                   0.354         
## PAIN_INJURY                                                                   
## PAIN_HAND                                                                     
## PAIN_BACK                                               0.381                 
## PAIN_FOOT                                               0.402                 
## ERGO_QUIZ               0.320                                                 
##                        Factor8 Factor9
## TRNG_INFORMAL                         
## TRNG_POSTURAL                         
## TRNG_TACTILE_MALES                    
## TRNG_TACTILE_FEMALES                  
## TRNG_DIALEXT                          
## EQUIP_DIALEXT_AVAIL                   
## EQUIP_DIALEXT_ENC                     
## RESPECT_NURSES                        
## RESPECT_TECHS                         
## PAIN_NECKSH                           
## PAIN_LEG                              
## RESPECT_ESTAFF                  0.375 
## RESPECT_ANES                          
## RESPECT_GIATT                         
## EQUIP_LAPRON_AVAIL                    
## EQUIP_APRONS_CT                       
## TRNG_MONITOR                    0.525 
## EQUIP_MONITORS_EASYADJ          0.615 
## TRNG_FORMAL                           
## TRNG_BEDHT                            
## TRNG_BEDANG             0.307         
## TRNG_MANEUVERS                        
## TRNG_EXERCISE                         
## TRNG_PEDISCOPE                        
## TRNG_FEEDBACK           0.302         
## TRNG_OPTIM              0.318         
## TRNG_SENSITIVITY        0.348         
## EQUIP_GLOVE_AVAIL       0.361         
## EQUIP_TO_INFORMAL                     
## RESPECT_GPAINS          0.413         
## RESPECT_FNAME                         
## PAIN_INJURY                           
## PAIN_HAND                             
## PAIN_BACK                             
## PAIN_FOOT                             
## ERGO_QUIZ                             
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5 Factor6 Factor7 Factor8
## SS loadings      2.216   1.957   1.854   1.699   1.527   1.315   1.310   1.125
## Proportion Var   0.062   0.054   0.051   0.047   0.042   0.037   0.036   0.031
## Cumulative Var   0.062   0.116   0.167   0.215   0.257   0.294   0.330   0.361
##                Factor9
## SS loadings      1.111
## Proportion Var   0.031
## Cumulative Var   0.392
FACTANAL.VARIMAX9 <- cbind( FACTOR_BASE, FACTANAL.VARIMAX$scores ) %>% 
  rename( F1.ERGOTRNG = Factor1,
          F2.TACTTRNG = Factor2,
          F3.DIAL.EXT = Factor3,
          F4.RESPECT.JRSTAFF = Factor4,
          F5.PAIN = Factor5,
          F6.RESPECT.SRSTAFF = Factor6,
          F7.APRONS.FIT.AVAIL = Factor7,
          F8.TRNG.SENSITIVITY = Factor8,
          F9.MONITORS = Factor9) %>% 
          mutate(TRAINING_LEVEL =  factor(TRAINING_LEVEL, ordered = F),
                 TRAINING_LEVEL = relevel(TRAINING_LEVEL, ref= "Advanced"))


Y <-
  FACTANAL.VARIMAX9 %>% 
  pivot_longer( cols= F1.ERGOTRNG : F9.MONITORS,
                names_to = "FACTOR",
                values_to = "FACTOR.SCORES")  %>% select( RECORD_ID, BIRTHSEX:HTCAT, FACTOR, FACTOR.SCORES)


grouped_ggbetweenstats(
  data = Y,
  x = BIRTHSEX,
  y = FACTOR.SCORES,
  grouping.var = FACTOR )

#Simple Linear Regressions to see whether significance survives controlling for Race (binary values) & Training Level

frot9.lm5 <- lm(F5.PAIN ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = FACTANAL.VARIMAX9 )
frot9.lm6 <- lm(F6.RESPECT.SRSTAFF ~ BIRTHSEX + TRAINING_LEVEL + RACE2 , data = FACTANAL.VARIMAX9)
frot9.lm7 <- lm(F7.APRONS.FIT.AVAIL ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = FACTANAL.VARIMAX9)
frot9.lm8 <- lm( F8.TRNG.SENSITIVITY ~ BIRTHSEX + TRAINING_LEVEL + RACE2 , data = FACTANAL.VARIMAX9 )
frot9.lm6h <- lm( F6.RESPECT.SRSTAFF ~ HTCAT + TRAINING_LEVEL + RACE2 , data = FACTANAL.VARIMAX9 )

tab_model( frot9.lm5,
           frot9.lm6,
           frot9.lm7,
           frot9.lm8,
           frot9.lm6h,
           show.ci=F, show.se=T)
  F5.PAIN F6.RESPECT.SRSTAFF F7.APRONS.FIT.AVAIL F8.TRNG.SENSITIVITY F6.RESPECT.SRSTAFF
Predictors Estimates std. Error p Estimates std. Error p Estimates std. Error p Estimates std. Error p Estimates std. Error p
(Intercept) 0.23 0.25 0.354 0.02 0.24 0.942 0.18 0.25 0.476 -0.14 0.22 0.525 -0.07 0.24 0.778
BIRTHSEX [M] -0.22 0.12 0.061 0.27 0.11 0.015 0.34 0.11 0.004 0.38 0.10 <0.001
TRAINING LEVEL [First
Year]
-0.29 0.24 0.230 -0.08 0.23 0.723 -0.60 0.24 0.014 -0.06 0.22 0.766 -0.10 0.23 0.664
TRAINING LEVEL [Second
Year]
-0.15 0.24 0.538 -0.14 0.23 0.534 -0.29 0.24 0.230 -0.19 0.22 0.391 -0.10 0.23 0.651
TRAINING LEVEL [Third
Year]
-0.30 0.25 0.226 -0.11 0.24 0.654 -0.27 0.24 0.277 -0.18 0.22 0.421 -0.10 0.23 0.656
RACE2 [NON-WHITE] 0.20 0.12 0.080 -0.10 0.11 0.359 0.02 0.11 0.834 0.13 0.10 0.218 -0.10 0.11 0.370
HTCAT [1] 0.37 0.11 0.001
Observations 225 225 225 225 225
R2 / R2 adjusted 0.051 / 0.029 0.040 / 0.018 0.088 / 0.067 0.066 / 0.044 0.061 / 0.039



  • Conclusion: Rotated factor solutions do not load RESPECT as highly as the unrotated solution we initially selected. Furthermore, rotated solutions with more than five factors seem to load respect-oriented elements highly onto two separate factors that can loosely be seen as RESPECT-JUNIOR STAFF (nurses, techs) and RESPECT-SR STAFF (GI Attending, Anesthesiologist), with only the later have statistical significance when regressed onto BIRTHSEX in a linear model.

  • Four of our factors – F5.Pain, F6.Respect.SrStaff, F7.Aprons.Fit.Avail, F8.Training.Sensitivity – all showed gender difference, and with the exception of F5.Pain, BIRTHSEX maintained its significance in linear regression models after controlling for Training Level (ref level = “Advanced”) and Race (binary, ref=White vs. Non-White). The TRAINING_LEVEL variable seemed to knock the F5.Pain factor out of significance, but only barely (p=0.0610). The HTCAT variable also showed statistical significance in our fully-adjusted model with outcome factor F6.RESEPECT.SRSTAFF, which is similar to what we’ve seen in other analyses here: specifically, that height and sex seem to behave almost identically in various models where RESPECT is the outcome of interest. And in many cases, height produces a stronger model, based on the R2 score.





Question 6: What happens if we use Principal Component Analysis instead as a means of data reduction?

  • Confirmatory Factor Analysis attempts to reduce a set of variables by finding the latent variables observed in your data. In this survey, we’d essentially created questions that dealt with Ergonomics, Respect, Tactile Training, Specialty Equipment use and availability, and our hope was that CFA would load our variables onto factors that match the categories already inherent in our data. (The five-factor unrotated solution above essentially did that.)

  • Principal Component Analysis approaches data reduction slightly differently: It aggregates related variables to create composite variables that might not have be obvious initially in our data.

  • Here, the psych package is used to create a 7-component solution with a quartimax rotation applied. This solution explains 45% of the variability in our data.

  • The resultant scores are then examined to see whether there are any differences noted along certain demographic variables, chief among them, BIRTHSEX.



  require(GPArotation)
## Loading required package: GPArotation
  #call for principal from psych package
  #nfactors specifies the  number of COMPONENTS to extract/display, despite its name
  
  PRINCOMP_FINAL <- psych::principal( X, rotate= "quartimax", nfactors =7, scores=TRUE)
  
  summary(PRINCOMP_FINAL)
## 
## Factor analysis with Call: psych::principal(r = X, nfactors = 7, rotate = "quartimax", scores = TRUE)
## 
## Test of the hypothesis that 7 factors are sufficient.
## The degrees of freedom for the model is 399  and the objective function was  3.51 
## The number of observations was  225  with Chi Square =  723.95  with prob <  3.9e-21 
## 
## The root mean square of the residuals (RMSA) is  0.06
  print(PRINCOMP_FINAL$values)
##  [1] 4.2771370 2.7111582 2.4569741 1.9384490 1.8619515 1.5690384 1.4744441
##  [8] 1.4089819 1.2774367 1.2245165 1.1189824 1.0374800 0.9826368 0.9193905
## [15] 0.8908376 0.8352843 0.7843136 0.7744779 0.7220664 0.6916836 0.6864547
## [22] 0.6353313 0.5910143 0.5860888 0.5782687 0.5402646 0.4960308 0.4755599
## [29] 0.4406524 0.3926614 0.3669384 0.3415652 0.2925285 0.2777567 0.2195029
## [36] 0.1221410
  print(PRINCOMP_FINAL$loadings)
## 
## Loadings:
##                        RC2    RC1    RC3    RC4    RC5    RC6    RC7   
## TRNG_FORMAL                    0.306                       0.358       
## TRNG_INFORMAL                  0.662               -0.175        -0.166
## TRNG_POSTURAL          -0.117  0.675 -0.151                       0.122
## TRNG_BEDHT                     0.529         0.142 -0.142        -0.186
## TRNG_BEDANG             0.275  0.348         0.165         0.180  0.275
## TRNG_MONITOR            0.229  0.550  0.202 -0.132                     
## TRNG_MANEUVERS          0.133  0.442         0.226         0.208  0.130
## TRNG_EXERCISE                  0.237  0.419  0.170  0.227  0.139       
## TRNG_DIALEXT            0.108         0.785               -0.149       
## TRNG_PEDISCOPE                 0.319  0.378                0.159       
## TRNG_FEEDBACK                  0.492         0.457                0.313
## TRNG_OPTIM              0.127                0.241 -0.254  0.149  0.432
## TRNG_TACTILE_MALES                           0.916                     
## TRNG_TACTILE_FEMALES                         0.907                     
## TRNG_SENSITIVITY               0.264  0.207  0.173 -0.146  0.250  0.461
## EQUIP_GLOVE_AVAIL       0.239               -0.196         0.230  0.430
## EQUIP_DIALEXT_AVAIL                   0.808                            
## EQUIP_DIALEXT_ENC                     0.703  0.201                     
## EQUIP_LAPRON_AVAIL                   -0.133                0.797       
## EQUIP_TO_INFORMAL       0.284  0.387                0.126         0.247
## EQUIP_MONITORS_EASYADJ  0.172  0.229  0.145        -0.107  0.200  0.255
## RESPECT_NURSES          0.792                                          
## RESPECT_TECHS           0.756                              0.115       
## RESPECT_ESTAFF          0.739                                          
## RESPECT_ANES            0.589         0.104        -0.156              
## RESPECT_GIATT           0.297  0.125               -0.372              
## RESPECT_GPAINS                 0.177 -0.125 -0.165 -0.236         0.524
## RESPECT_FNAME           0.491  0.162                              0.122
## PAIN_INJURY                    0.184                0.354              
## PAIN_HAND                      0.158 -0.214         0.288 -0.180 -0.338
## PAIN_NECKSH                           0.147  0.105  0.607        -0.251
## PAIN_BACK                                           0.534  0.115 -0.139
## PAIN_LEG               -0.102 -0.114        -0.139  0.719              
## PAIN_FOOT                             0.105         0.558         0.130
## ERGO_QUIZ                      0.251  0.113        -0.156  0.345 -0.471
## EQUIP_APRONS_CT         0.111        -0.118  0.105         0.783       
## 
##                  RC2   RC1   RC3   RC4   RC5   RC6   RC7
## SS loadings    2.856 2.815 2.398 2.299 2.203 1.959 1.759
## Proportion Var 0.079 0.078 0.067 0.064 0.061 0.054 0.049
## Cumulative Var 0.079 0.158 0.224 0.288 0.349 0.404 0.452
  PRINCOMP7.ROTQUART <- cbind( FACTOR_BASE, PRINCOMP_FINAL$scores[, 1:7])  %>% 
  
      rename( RC1.RESPECT = RC2,
              RC2.ERGO.TRAINING = RC1,
              RC3.SPEC.EQUIP = RC3,
              RC4.TACTILE.INST = RC4,
              RC5.PAIN  = RC5,
              RC6.APRONS = RC6,
              RC7.TRNG.SENSITIVITY.OPTIM = RC7)  %>% 
          mutate(TRAINING_LEVEL =  factor(TRAINING_LEVEL, ordered = F),
                 TRAINING_LEVEL = relevel(TRAINING_LEVEL, ref= "Advanced"))
  
  
  P <-
    PRINCOMP7.ROTQUART %>% 
    pivot_longer( cols= RC1.RESPECT : RC7.TRNG.SENSITIVITY.OPTIM,
                  names_to = "PCOMP",
                  values_to = "RC.SCORES")  %>% select( RECORD_ID, BIRTHSEX:HTCAT, PCOMP, RC.SCORES)
  
  
  #Confirm that final components are uncorrelated/orthogonal
  
  pairs.panels( PRINCOMP7.ROTQUART[, c(44:50)] , pch=21, stars=T) 

  # Examine Sex differences along each Principal Component
  
  grouped_ggbetweenstats(
    data = P,
    x = BIRTHSEX,
    y = RC.SCORES,
    grouping.var = PCOMP )


#Simple Linear Regressions to see whether significance survives controlling for Race (binary values) & Training Level

pcrot7.lm1 <- lm(RC1.RESPECT ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = PRINCOMP7.ROTQUART )
pcrot7.lm5 <- lm(RC5.PAIN ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = PRINCOMP7.ROTQUART)
pcrot7.lm6 <- lm(RC6.APRONS ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = PRINCOMP7.ROTQUART)
pcrot7.lm7 <- lm(RC7.TRNG.SENSITIVITY.OPTIM ~ BIRTHSEX + TRAINING_LEVEL + RACE2, data = PRINCOMP7.ROTQUART)
pcrot7.lm1h <- lm( RC1.RESPECT ~ HTCAT + TRAINING_LEVEL + RACE2 , data = PRINCOMP7.ROTQUART )

tab_model( pcrot7.lm1,
           pcrot7.lm5,
           pcrot7.lm6,
           pcrot7.lm7,
           pcrot7.lm1h, show.ci=F, show.se=T)
  RC1.RESPECT RC5.PAIN RC6.APRONS RC7.TRNG.SENSITIVITY.OPTIM RC1.RESPECT
Predictors Estimates std. Error p Estimates std. Error p Estimates std. Error p Estimates std. Error p Estimates std. Error p
(Intercept) -0.05 0.30 0.863 0.21 0.29 0.472 0.06 0.29 0.842 -0.25 0.29 0.400 -0.11 0.30 0.720
BIRTHSEX [M] 0.27 0.14 0.052 -0.30 0.14 0.028 0.37 0.13 0.006 0.42 0.14 0.002
TRAINING LEVEL [First
Year]
-0.20 0.29 0.491 -0.34 0.29 0.237 -0.57 0.28 0.044 -0.04 0.29 0.888 -0.22 0.29 0.449
TRAINING LEVEL [Second
Year]
-0.13 0.29 0.665 -0.12 0.29 0.664 -0.18 0.28 0.522 -0.23 0.29 0.418 -0.09 0.29 0.749
TRAINING LEVEL [Third
Year]
0.05 0.29 0.853 -0.23 0.29 0.437 -0.10 0.29 0.725 -0.23 0.29 0.438 0.06 0.29 0.844
RACE2 [NON-WHITE] 0.00 0.14 0.991 0.29 0.14 0.032 0.04 0.13 0.753 0.31 0.14 0.022 -0.00 0.14 0.996
HTCAT [1] 0.33 0.14 0.019
Observations 225 225 225 225 225
R2 / R2 adjusted 0.033 / 0.011 0.064 / 0.042 0.086 / 0.065 0.062 / 0.041 0.041 / 0.019


  • Conclusion: Four of our seven rotated components exhibited significant differences along the BIRTHSEX variable: Component1-Respect, Component5-Pain/Injury, Component6-Aprons, and Component7-Traning with Sensitivity & Optimization. Moreover, when modeled in simple linear regressions and controlled for both RACE and TRAINING_LEVEL, BIRTHSEX remained significant against three of these rotated components as outcome variables. (Component1-Respect was almost significant at p= 0.052. The TRAINING_LEVEL covariate knocked this component out of significance.)

  • Similar to what was observed during our CFA analysis, the HTCAT variable (which divides the sample into those above and below the 5’7” cutoff) behaves almost identically on the Respect component to BIRTHSEX In fact, in the fully adjusted model (shown), it actually retains its significance.